R/cqmaTools.R

Defines functions getHistoryAC computeTrajTime intersectTraj.intersect plotTrajYear plotTrajbackground crossdata trajOutInterpolation trajinbound trajreachthesea intersectTraj splitRawdata filterTrajHeight

Documented in computeTrajTime crossdata filterTrajHeight getHistoryAC intersectTraj intersectTraj.intersect plotTrajbackground plotTrajYear splitRawdata trajinbound trajOutInterpolation trajreachthesea

#---- NOTES: ----
# a vertical profile is made of jars (12 or 17). Each jar is taken at certain height . A vertical profile corresponds to one flight
# in the lab, each jar is analyzed and gas concentration is observed
# for each jar (height) in the profile, a hysplit trajectory is computed
# each trajectory reach the sea at some point. We 're intereted ONLY in the first point
# for each trajectory' point-over-the-sea, we interpolate a gas concentration

#---- TODO: ----

#---- BACKGROUND ----


#' @title Filter trajectories by height
#' @name filterTrajHeight
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Check if the trajectories are above the given treshold
#'
#' @param file.vec A character vector. The paths to the input files
#' @param above    A length-1 numeric. The treshold
#' @param cnames   A character vector. The column names of the hysplit files
#' @return         A data frame with one row for each file and 2 columns: The trajectories' path and a boolean indicating if they meet the test
#' @export
filterTrajHeight <- function(file.vec, above, cnames){
  # check trajectories' height and make a vector of those to keep
  #cnames <- HYSPLIT.COLNAMES                                                    # column names of the input file    
  file.dat.list <- files2df(file.vec = file.vec, header = FALSE, 
                             skip = 0, cnames = cnames)
  keep <- vector(mode = "logical", length = length(file.dat.list))
  keep <- lapply(file.dat.list, function(x){if(sum(x$height < above) > 0){return(FALSE)}; return(TRUE)}) # test
  keep <- as.vector(unlist(keep))
  return(data.frame(file.vec, keep))
}



#' @title Split a raw data file.
#' @name splitRawdata
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Split a single raw data file. The output files are prefixed with 
#' "traj_specs_".
#'
#' @param file.in    A character.Path to a raw data file
#' @param path.out   A character. Path to the folder for storing the resulting files
#' @param colname    A character. The name of a column where the flags are located
#' @param keepFlags  A character vector. Flags to keep in the raw data. i.e. c("...", "..>")
#' @param cnames     A character vector. The name of the columns of the raw data file
#' @param keepCols   A character vector. The column names of the raw data file to keep after filtering
#' @param cnamesTest A character vector. The column names used for testing duplicated rows in the filtered data
#' @return           A list of three. A character with the name of the new file, a data.frame of duplicated or inconsistent rows, and a data.frame of coordinates which were changed because they have the wrong sign
#' @export
splitRawdata <- function(file.in, path.out, colname, keepFlags, cnames, 
                         keepCols, cnamesTest){
  # raw data column names
  #cnames <- RAW.DATA.COLNAMES
  #keepCols <- RAW.DATA.COLNAMES.KEEP                                            # keep these columns
  #cnamesTest <- RAW.DATA.COLNAMES.TESTDUPLICATED
  
  file.dat <- file2df(file.in = file.in, header = FALSE, skip = 0,             # read data
                       cnames = cnames)
  # filter data
  file.dat <- .filterDataframe(df = file.dat, keepCols = keepCols,              # filter using flag attribute
                               flagName = colname, keepFlags = keepFlags)
  # report duplicated rows
  testUnique.vec <- apply(file.dat[, cnamesTest], 
                          MARGIN = 1, 
                          function(x){
                            gsub(" ", "0", paste(unlist(x), collapse = "___"))
                          })
  testUnique.df <- as.data.frame(table(as.vector(testUnique.vec)), 
                                 stringsAsFactors = FALSE)
  dup.df <- testUnique.df[testUnique.df$Freq > 1, ]
  # test lat lon for missing signs
  file.dat.list <- split(file.dat, file.dat$site)
  nsd = 3                                                                       # number of standard deviationto identify an outlier 
  treshold = 10                                                                 # the minimum difference (in SDs) to accept a sign  change in coordinates
  #
  outll <- parallel::mclapply(file.dat.list, 
                              .checklonlat, 
                              nsd = nsd, 
                              treshold = treshold)
  #
  # replace coords
  file.dat <- do.call("rbind", file.dat.list)
  ll.df <- do.call("rbind", outll)
  file.dat[, c("lon", "lat")] <- ll.df[, c("lon", "lat")]
  # store the results
  newfile <- file.path(path.out, paste("traj_specs_", 
                                       basename(file.in), sep = ""), 
                       fsep = .Platform$file.sep)
  utils::write.table(file.dat, file = newfile, col.names = FALSE, 
                     row.names = FALSE, quote = FALSE)
  # report  
  wrongcoords <- ll.df[ll.df$changed == TRUE, c("lon", "lat")] * (-1)
  wrongcoords["idrow"] <- rownames(wrongcoords)
  colnames(dup.df) <- c("shYMDhmflask", "Freq")
  dup.df["idrow"] <- as.numeric(rownames(dup.df))
  return(list(newfile, dup.df, wrongcoords))
}




#' @title Intersect trajectories 
#' @name intersectTraj
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Find trajectories' first point beyond the limit (e.g. over the sea).
#'
#' @param file.vec A character vector. The paths to the input files
#' @param limit.in A SpatialLinesDataFrame object which is used to intersect the trajectories.
#' @param cnames   A character vector. The name of the columns of the raw data file (hysplit)
#' @param srs      A length-1 character. The spatial reference system
#' @return         A list made of a character vector and a list. The character vector is the path to each trajectory file. The list contains the first row in the trajectory file which lies over the sea
#' @export
intersectTraj <- function(file.vec, limit.in, cnames, srs){
  #cnames <- HYSPLIT.COLNAMES                                                    # column names of the input file  
  #srs <-  sp::CRS(SPATIAL.REFERENCE.SYSTEM)
  
  intersect.dat <- list()
  if(length(file.vec) == 0){
    warning("No input files!")
    return(list(file.vec, intersect.dat))
  }
  traj.dat.list <- files2df(file.vec = file.vec, header = FALSE, 
                             skip = 0, cnames = cnames)
  traj.spl.list <- parallel::mclapply(traj.dat.list, .traj2spLines, crs = sp::CRS(srs))  # get SpatialLines from trajectories
  traj.intersect.list <- parallel::mclapply(traj.spl.list, 
                                            cqmaTools::intersectTraj.intersect, 
                                            g1 = limit.in, 
                                            byid = c(FALSE, TRUE))              # intersetion of trajectories with the limit
  traj.rowid.list <- parallel::mclapply(traj.intersect.list,                    # row id so the initial point of the intersection line in the trajectory
                                        function(x){
                                          if(is.null(x)){return(NULL)};
                                          return(as.numeric(
                                            rownames(methods::slot(x, "coords"))[1]))
                                        }) 
  # get the data from the intersection
  intersect.dat <- parallel::mclapply(1:length(traj.dat.list),
                                      function(x, traj.list, rowid.list){
                                        if(is.null(rowid.list[[x]])){return(NULL)}; 
                                        return(
                                          traj.list[[x]][as.numeric(rowid.list[[x]]) + 1, ]
                                        )
                                      } ,                                         # return the next id after the intersection. This corresponds to the line's point falling beyond the limit (on the sea)
                                      traj.list = traj.dat.list, 
                                      rowid.list = traj.rowid.list)
  return(list(file.vec, intersect.dat))
}




#' @title Check if the trajectories reach beyond the limit (the sea)
#' @name trajreachthesea
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Check if the trajectories reach beyond the limit (the sea)
#'
#' @param traj.intersections A list made of a character vector and a list. The character vector is the path to each trajectory file while the list contains the first row in the trajectory file which lies over the sea
#' @return                   A data.frame with one row for each file and 2 columns: The trajectories' path and a boolean indicating if they were kept
#' @export
trajreachthesea <- function(traj.intersections){
  file.vec <- unlist(traj.intersections[[1]])
  trajintersect.list <- traj.intersections[[2]]
  keep <- logical()
  if(length(file.vec) == 0){warning("No input files!"); return(data.frame(file.vec, keep))}
  # get the files with at least one row, that is, the trajectories which reach to the sea
  keep <- unlist(lapply(trajintersect.list, is.null))
  keep <- !keep
  return(data.frame(file.vec, keep))
}




#' @title Are trajectories inside boundaries?
#' @name trajinbound
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Check if the trajectory intersection is in the given boundary
#'
#' @param traj.intersections A list of two obejcts. A character vector and a list of data.frames. The data.frames are made of the rows in the matching trajectory file which first crossed to the sea
#' @param minx               A numeric. The minimum X
#' @param maxx               A numeric. The maximum Y
#' @param miny               A numeric. The minimum Y
#' @param maxy               A numeric. The maximum X
#' @return                   A data frame
#' @export
trajinbound <- function(traj.intersections, minx, maxx, miny, maxy){
  keep <- logical()
  file.vec <- unlist(traj.intersections[[1]])
  trajintersect.list <- traj.intersections[[2]]
  if(length(file.vec) == 0){warning("No input files!"); return(data.frame(file.vec, keep))}
  keeptraj <- sapply(trajintersect.list, is.null)
  keeptraj <- !keeptraj
  kfile.vec <- file.vec[keeptraj]
  ktrajintersect.list <- trajintersect.list[keeptraj]
  ktrajintersect.df <- do.call("rbind", ktrajintersect.list)
  xy.df <- ktrajintersect.df[, c("lon", "lat")]
  colnames(xy.df) <- c("x", "y")
  keep <- .inbound(xy.df = xy.df, minx = minx, maxx = maxx, 
                   miny = miny, maxy = maxy)
  k <- as.data.frame(cbind(kfile.vec, keep), stringsAsFactors = FALSE)
  names(k) <- c("file.vec", "keep" )
  file.vec <- as.data.frame(file.vec, stringsAsFactors = FALSE)
  colnames(file.vec) <- "file.vec"
  file.vec <- merge(x = file.vec, y = k, by = "file.vec", all.x = TRUE)
  file.vec$keep <- as.logical(file.vec$keep)
  return(file.vec)
}




# 
#
#' @title Are trajectories inside the metereological station coverage?
#' @name trajOutInterpolation
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Check if the trajectories' intersections fall inside the metereological station coverage
#'
#' @param traj.intersections A list made of a character vector and a list. The character vector is the path to each trajectory file while the list contains the first row in the trajectory file which lies over the sea
#' @param stations.df        A data.frame of metereological station data. It must have at least 4 columns: name, longitude, latitude, and file c("name", "lon", "lat", "file")
#' @return                   A data.frame with one row for each file and 2 columns: The trajectories' path and a boolean indicating if meet the test
#' @export
trajOutInterpolation <- function(traj.intersections, stations.df){
  stations.df <- stations.df[order(stations.df$lat),]                           # sort by latitude. This is mandatory
  invalid <- unlist(lapply(traj.intersections[[2]], is.null))                   # mark invalid
  file.vec <- unlist(traj.intersections[[1]])[!invalid]                         # The first list contains the paths to trajectory files
  trajrecords.df <- do.call("rbind", traj.intersections[[2]])                   # The second list contains the intersections. That is, the trajectories' first row over the sea. One row per trajectory
  matchInterval <- .inInterval(val = unlist(trajrecords.df["lat"]),             # which stations should be used for each trajectory interpolation? . This is the match of trajectories to stations for interpolation 
                               vec = unlist(stations.df["lat"]))
  keep <- matchInterval[,1] == TRUE | matchInterval[,2] == TRUE                 
  pres <- cbind(file.vec, as.vector(keep))                                      # who passes the test?
  # match to the original files
  ti1.df <- as.data.frame(traj.intersections[[1]])
  colnames(ti1.df) <- "file.vec"
  res <- merge(ti1.df, pres, by = "file.vec", all = TRUE)
  colnames(res) <- c("file.vec", "keep")
  res["keep"] <- as.logical(res$keep)                                           # R, you motherfucking piece of shit!     
  return(res)
}



#' @title Get the stations' data matching the trajectory records
#' @name crossdata
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description This function takes a trajectory's first point beyond the limit (over the sea) and project its latitude to an straight line made by the corresponding metereological stations
#'
#' @param traj.intersections A list made of a character vector and a list. The character vector is the path to each trajectory file while the list contains the first row in the trajectory file which lies over the sea
#' @param stations.df        A data.frame of metereological station data. It must have at least 4 columns: name, longitude, latitude, and file c("name", "lon", "lat", "file")
#' @param tolerance.sec      A numeric. A tolerance used when comparing dates
#' @param timezone           A character. The time zone. i.e. "GMT"
#' @param searchTranslation  A numeric. The number of seconds for searching metereological station data
#' @return                   A list of numeric. Each number is the interpolation result from the matching stations
#' @export
crossdata <- function(traj.intersections, stations.df, tolerance.sec, timezone, 
                      searchTranslation){
  interpolation.res <- list()
  # sort by latitude. This is mandatory
  stations.df <- stations.df[order(stations.df$lat),]                           
  if (length(traj.intersections[[1]]) == 0)
    return(interpolation.res)
  if (length(traj.intersections[[2]]) == 0)
    return(interpolation.res)
  
  # filter latitudes
  # joins all the data.frames from records into a single one. 
  # NOTE1: it assumes all the data.frames have the same columns, in the same order
  # NOTE2: it assumes a single row on each record. Otherwise the following calculations are WRONG
  
  # The first list contains the paths to trajectory files
  trajfiles <- unlist(traj.intersections[[1]])
  
  # The second list contains the intersections. That is, the trajectories' first row over the sea. One row per trajectory
  trajrecords.df <- do.call("rbind", add_na_df(traj.intersections[[2]]))
 
  stopifnot(nrow(trajrecords.df) == length(traj.intersections[[2]]))
   
  # which stations should be used for each trajectory interpolation?
  # This is the match of trajectories to stations for interpolation
  matchInterval <- .inInterval(val = unlist(trajrecords.df["lat"]),
                               vec = unlist(stations.df["lat"])) 
  out <- matchInterval[,1] == FALSE & matchInterval[,2] == FALSE
  if (sum(out, na.rm = TRUE) > 0)
    warning("Trajectories falling out of interpolation zone:", 
            sum(out, na.rm = TRUE), " \n", 
            paste(trajfiles[which(out)], collapse = " \n"))
  # read station's data from files
  station.dat.list <- list()
  for (i in 1:nrow(stations.df)) {
    station.dat <- utils::read.table(file = as.character(stations.df[i, "stationfile"]), sep = "", header = FALSE)
    colnames(station.dat) <- c("datedec", "V2")
    station.dat.list[[i]] <- station.dat
  }
  # add a column with normal dates instead of decimal year dates
  station.dat.list <- parallel::mclapply(station.dat.list, 
                                         function(x){
                                           x["date"] <- unlist(lapply(unlist(x["datedec"]), .ydec2date)); 
                                           return(as.data.frame(x))
                                         }) # stations' data
  # do the interpolation
  interpolation.res <- lapply(
    1:nrow(trajrecords.df), 
    .crossdata.aux, 
    traj.records = trajrecords.df, 
    traj.data.match = matchInterval, 
    stations = stations.df, 
    stations.dat = station.dat.list, 
    # match station's data X days before the records' data
    searchTranslation = searchTranslation,
    timezone = timezone, 
    tolerance.sec = tolerance.sec
  )
  return(interpolation.res)
}




#' @title Plot profiles and trajectories
#' @name plotTrajbackground
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Plot the input data into map, section, and profile graphs. These plots are stored in disk
#'
#' @param file.in            A character. The path to a filtered observations file or flag filtered raw data
#' @param path.out           A character. The path to a folder to store the results
#' @param traj.plot          A vector of character. The file path to trajectories to plot additional to those in traj.intersections[[1]]
#' @param traj.interpol      A list of numeric. The interpolated values of teh trajectories over the sea
#' @param traj.intersections A list made of a character vector and a list. The character vector is the path to each trajectory file while the list contains the first row in the trajectory file which lies over the sea
#' @param device             A character. Image format, i.e. PNG
#' @param map.xlim           A numeric vector. Map's min & max longitude
#' @param map.ylim           A numeric vector. Map's min & max latitude
#' @param map.height         A numeirc. Map image size
#' @param map.width          A numeirc. Map image size
#' @param sec.width          A numeric. Crosssection map image size
#' @param sec.height         A numeric. Crosssection map image size
#' @param prof.height        A numeric. Profile image size
#' @param prof.width         A numeric. Profile image size
#' @param nsd                A numeric. Number of standard deviations to use to filter the interpolated data
#' @param maxfm.ppm          A numeric. Maximum number of units away from the central tendency
#' @param stations.df        A data.frame with metereological station data. It must contain at least the columns c("name", "lon", "lat")
#' @param plot2file          A logical or NA. Should plots be stored as files? Use NA to not plot anything
#' @param logger             A log4r object
#' @param trajCnames         A character. Column names of hysplit files
#' @param obsCnames          A character. Filtered observation column names
#' @param profileCnames      A character. Column names of the pofiles
#' @param trajFileMet        A character. Metadata included in the trajectory's file names
#' @return                   A list of objects. A character vector with the paths to the plot files, and a data.frame with merged data
#' @export
plotTrajbackground <- function(file.in, path.out, traj.interpol, 
                                traj.intersections, traj.plot, 
                                device, map.xlim, 
                                map.ylim, map.height, map.width, sec.width, 
                                sec.height, prof.height, prof.width, nsd, 
                                maxfm.ppm, stations.df, plot2file, logger, 
                               trajCnames, obsCnames, profileCnames, trajFileMet){
  ## @param use.backgorund     A character. The type of filter used when calculating the background concentration. The options are c("median", "hard")
  
  if (length(traj.interpol) == 0) {
    warning("No interpolations!")
    return()
  }
  #trajCnames <- HYSPLIT.COLNAMES                                                # column names of the trajectory files
  #obsCnames <- RAW.DATA.COLNAMES.KEEP                                           # filtered observation column names
  #profileCnames <- PROFILE.COLNAMES
  lon <- 0; lat <- 0; filename <- 0; height <- 0; concentration <- 0; type <- 0 # avoid notes during package check
  profil <- 0;file.vec <- 0;  sheight <- 0; profile <- 0; trajlabel <- 0
  #
  # observation data
  #
  raw.df <- file2df(file.in = file.in, header = FALSE,                         # flag filtered raw data
                     skip = 0, cnames = obsCnames) 
  raw.df["profile"] <- apply(raw.df[, profileCnames],                        # add profile column to raw data
                             MARGIN = 1, 
                             function(x){
                               x[[3]] <- formatC(x[[3]], width = 2, flag = "0") # 
                               x[[4]] <- formatC(x[[4]], width = 2, flag = "0")
                               return(gsub(" ", "0", paste(unlist(x), collapse = "_")))
                             })
  #
  # trajectory data
  #
  traj.file.vec <- c(traj.intersections[[1]], traj.plot)                        # all the trajectory file names 
  traj.file.vec <- traj.file.vec[!is.na(traj.file.vec)]                         # remove NAs
  traj.dat.list <- files2df(file.vec = traj.file.vec,                          # read the trajectory files into a list of data.frames 
                             header = FALSE, skip = 0, cnames = trajCnames)
  traj.dat.list <- .listname2data.frame(df.list = traj.dat.list,                # add file name as column
                                        colname = "file.vec")
  traj.dat.df <- do.call("rbind", traj.dat.list)                                # collapse to a single data.frame
  traj.dat.df["profile"] <- .filename2profile(unlist(traj.dat.df["file.vec"]))  # add profile column
  traj.dat.df["trajlabel"] <- .formatTrajname(unlist(traj.dat.df["file.vec"]))  # add a label to order by height in the plot
  #
  # process interpolated data
  #
  interpol.df <- data.frame(traj.intersections[[1]], unlist(traj.interpol), 
                            stringsAsFactors = FALSE)
  names(interpol.df) <- c("file.vec", "interpolated")
  interpol.df["file.vec"] <- basename(as.vector(unlist(interpol.df["file.vec"])))
  interpol.df["profile"] <- .filename2profile(unlist(interpol.df["file.vec"]))  # add profile column
  interpol.df <- cbind(interpol.df, 
                       .trajFilenames2metadata(
                         file.vec = unlist(interpol.df["file.vec"]), 
                         cnames = trajFileMet))
  interpol.df["height"] <- .as.numeric.factor(interpol.df$height)
  #
  # process intersections
  #
  
  intersec.df <- cbind(basename(traj.intersections[[1]]), 
                       do.call("rbind", add_na_df(traj.intersections[[2]])))
  
  intersec.df["filerow"] <- as.numeric(rownames(intersec.df))
  colnames(intersec.df) <- c("file.vec", "V1", "V2", "syear", "smonth",         # rename columns
                             "sday", "shour", "smin", "V8", "V9", 
                             "slat", "slon", "sheight", 
                             "spressure", "filerow")
  intersec.df["syear"] <- unlist(intersec.df["syear"]) + 2000
  intersec.df["profile"] <- .filename2profile(as.vector(unlist(intersec.df["file.vec"])))  # add profile column
  intersec.df["trajlabel"] <- .formatTrajname(as.vector(unlist(intersec.df["file.vec"])))
  #
  # base map
  #
  basemap <- .buildbasemap(stations.df, map.xlim, map.ylim)
  #
  # plot
  #
  profile.vec <- unique(unlist(interpol.df["profile"]))                         # each profile to process below
  filenames <- vector(mode = "character", length = 0)                           # files created on this function
  profile.all <- data.frame()                                                   # keep the profile data  
  for (prof in profile.vec) {
    log4r::debug(logger, paste(" - - processing profile ", prof, sep = ""))
    #
    # merge data
    #
    
    #---- TODO: error ----
    # nrow(prof.obs) == 0 because the naming convention of profile changed????
    prof.obs <- raw.df[raw.df$profile == prof, ]                                # observed data
    if (nrow(prof.obs) < 1) {
      log4r::debug(logger, paste(" - - No observations found for ", prof, sep = ""))
      next()
    }
    
    prof.int <- interpol.df[interpol.df$profile == prof, ]                      # interpolated data
    prof.isec <- intersec.df[intersec.df$profile == prof, ]                     # trajectory data of the intersections
    prof.traj <- traj.dat.df[traj.dat.df$profile == prof, ]                     # all the trajectories of this profile
    # complete interpolation to match the observations
    prof.obs <- merge(x = prof.obs, 
                      y = prof.int[, c("file.vec", "interpolated", "height")], 
                      by = "height", all.x = TRUE)
    colnames(prof.obs)[colnames(prof.obs) == "concentration"] <- "observed"
    #
    # background calculation
    #
    my_data_vec <- as.vector(unlist(prof.obs["interpolated"]))
    if (length(my_data_vec) < 2) {
      stop(sprintf("Profile %s doesn't have enough heights.", prof))
      #next()
    }
    back.df <- .background.softrules(data.vec = my_data_vec, 
                                     nsd = nsd, maxfm.ppm = maxfm.ppm)
    #
    # merge more data
    #
    prof.obs <- cbind(prof.obs, back.df)
    prof.obs <- merge(x = prof.obs, y = subset(prof.isec, select = -profile ), 
                      by = "file.vec", all.x = TRUE)
    profile.all <- rbind(profile.all, prof.obs) # add the partial results to the results
    #
    # plot 1 - map
    #
    file.map <- file.path(path.out, paste(prof, "_map.", device, sep = ""),  # name of the file
                          fsep = .Platform$file.sep)
    
    m <- basemap +                                                              # get the base map
      ggplot2::geom_path(data = prof.traj,                                      # add the trajectories
                         mapping = ggplot2::aes(x = lon, y = lat, 
                                                group = trajlabel, 
                                                colour = trajlabel)) + 
      ggplot2::geom_point(data = prof.traj[1, c("lon", "lat")],                 # add the profile point
                          mapping = ggplot2::aes(x = lon, y = lat, group = NA), 
                          shape = 10, size = 3) + 
      ggplot2::geom_point(data = prof.isec,                                     # add the intersection points
                          mapping = ggplot2::aes(x = slon, y = slat, 
                                                 group = trajlabel, 
                                                 colour = trajlabel)) + 
      #ggplot2::geom_segment(aes(x=-30,y=-40,xend=-30, yend=0), color = "blue", linetype=2, size = 1)+
      #ggplot2::geom_segment(aes(x=-59,y=13,xend=-30, yend=0), color = "blue", linetype=2, size = 1)+
      #ggplot2::geom_segment(aes(x=18.189,y=-34.352,xend=-14.4, yend=-7.967), color = "black", linetype=3, size = 1)+
      #ggplot2::geom_segment(aes(x=-14.4,y=-7.967,xend=-30, yend=0), color = "black", linetype=3, size = 1)+
      ggplot2::labs(x = "Longitude ( graus )", y = "Latitude ( graus )",  color = "trajectory" )+
      ggplot2::guides(fill = ggplot2::guide_legend(reverse=TRUE))
    #
    # plot 2 - cross sections
    #
    file.lonsec <- file.path(path.out, paste(prof, "_sectionlon.", device, sep = ""), fsep = .Platform$file.sep)
    file.latsec <- file.path(path.out, paste(prof, "_sectionlat.", device, sep = ""), fsep = .Platform$file.sep)
    slon <- ggplot2::ggplot(data = prof.traj, 
                            mapping = ggplot2::aes(x = lon, y = height, 
                                                   group = trajlabel, 
                                                   colour = trajlabel)) +
      ggplot2::geom_path() + ggplot2::xlim(map.xlim) +                          # add the trajectories
      ggplot2::labs(x = "longitude", y = "height", color = "trajectory") + 
      ggplot2::geom_point(data = prof.isec,                                     # add the intersections
                          mapping = ggplot2::aes(x = slon, y = sheight, 
                                                 group = trajlabel, 
                                                 colour = trajlabel)) + 
      ggplot2::geom_vline(xintercept = prof.traj[1, "lon"], linetype = "dotted") + # add the flight
      ggplot2::guides(fill = ggplot2::guide_legend(reverse=TRUE))
    slat <- ggplot2::ggplot(data = prof.traj, 
                            mapping = ggplot2::aes(x = lat, y = height, 
                                                   group = trajlabel, 
                                                   colour = trajlabel)) +
      ggplot2::geom_path() + ggplot2::xlim(map.ylim) +                          # add the trajectories
      ggplot2::labs(x = "latitude", y = "height", color = "trajectory") + 
      ggplot2::geom_point(data = prof.isec,                                     # add the intersections
                          mapping = ggplot2::aes(x = slat, y = sheight, 
                                                 group = trajlabel, 
                                                 colour = trajlabel)) + 
      ggplot2::geom_vline(xintercept = prof.traj[1, "lat"], linetype = "dotted") + # add the flight
      ggplot2::guides(fill = ggplot2::guide_legend(reverse=TRUE))
    #
    # plot 3 - profile    
    #
    if(nrow(prof.obs) > 0){
      file.profile <- file.path(path.out, paste(prof, "_profile.", device, sep = ""), fsep = .Platform$file.sep)    
      prof.df <- prof.obs[, c("file.vec", "observed", "interpolated", "background", "height")]
      prof.df["type"] <- ""
      prof.obs.df <- prof.df[, c("file.vec", "observed", "height", "type")]
      prof.int.df <- prof.df[, c("file.vec", "interpolated", "height", "type")]
      prof.bac.df <- prof.df[, c("file.vec", "background", "height", "type")]
      prof.obs.df["type"] <- "observed"
      prof.int.df["type"] <- "interpolated"
      prof.bac.df["type"] <- "background"
      colnames(prof.obs.df)[colnames(prof.obs.df) == "observed"] <- "concentration"
      colnames(prof.int.df)[colnames(prof.int.df) == "interpolated"] <- "concentration"
      colnames(prof.bac.df)[colnames(prof.bac.df) == "background"] <- "concentration"
      prof.df <- rbind(prof.obs.df, prof.int.df, prof.bac.df)
      prof.df <- prof.df[with(prof.df, order( type, -height)), ]                  # sort
      p <- ggplot2::ggplot(data = prof.df, 
                           mapping = ggplot2::aes(x = concentration, 
                                                  y = height, group = type, 
                                                  colour = type)) + 
        ggplot2::geom_path() +
        ggplot2::geom_point()
      #
      # plot
      #
      if(!is.na(plot2file)){
        if(plot2file){
          ggplot2::ggsave(filename = file.map, plot = m, device = device, width = map.width, height = map.height)                     # save the maps to files
          ggplot2::ggsave(filename = file.lonsec, plot = slon, device = device, width = sec.width, height = sec.height)
          ggplot2::ggsave(filename = file.latsec, plot = slat, device = device, width = sec.width, height = sec.height)
          ggplot2::ggsave(filename = file.profile, plot = p, device = device, width = prof.width, height = prof.height)
          filenames <- append(filenames, c(file.map, file.lonsec, file.latsec, file.profile))
        }else{
          print(m)
          print(slon)
          print(slat)
          print(p)
        }
      }
    }
  }
  # write profile data
  file.profile <- file.path(path.out, paste(basename(file.in), "_results.txt", sep = ""), fsep = .Platform$file.sep)    
  if(nrow(profile.all) > 0){
    profile.all <- profile.all[, intersect(colnames(profile.all), 
                                           c("profile", "file.vec", "site", 
                                             "lon", "lat", "height", 
                                             "year", "month", "day", "hour", "min", 
                                             "flask", "eventnumber", 
                                             "observed", "background", "interpolated", "outlier", 
                                             "filerow", "V1", "V2", 
                                             "syear", "smonth", "sday", "shour", "smin", 
                                             "V8", "V9", 
                                             "slon", "slat", "sheight", "spressure"))]
    profile.all <- profile.all[with(profile.all, order( profile, -height)), ]     # order
    rownames(profile.all) <- NULL
    #utils::write.table(profile.all, file = file.profile)
    #filenames <- append(filenames, file.profile)
  }else{
    warning("profile.all is empty")
  }
  
  return(list(filenames, profile.all))
}




#' @title Plot the trajectories grouped by year
#' @name plotTrajYear
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Plot the trajectories grouped by year
#'
#' @param file.vec     A character vector. The paths to  trajectory files
#' @param path.out     A character. The path to the folder for storing the resulting files
#' @param device       A character. Image format, i.e. PNG
#' @param map.xlim     A numeric vector. Map's min & max longitude
#' @param map.ylim     A numeric vector. Map's min & max latitude
#' @param map.height   A numeirc. Map image size
#' @param map.width    A numeirc. Map image size
#' @param stations.df  A data.frame with metereological station data. It must contain at least the columns c("name", "lon", "lat")
#' @param plot2file    A logical. Should the plot be stored as a file
#' @return             A list. The path to the created files
#' @export
plotTrajYear <- function(file.vec, path.out, device, map.xlim, map.ylim, map.height, map.width, stations.df, plot2file){
  lon <- 0; lat <- 0; filename <- 0                                             # avoid notes during package check
  #
  # process data
  #
  trajCnames <- c("V1", "V2", "year", "month", "day", "hour", "min",            # column names of the trajectory files
                  "V8", "V9", "lat", "lon", "height", "pressure")   
  traj.dat.list <- files2df(file.vec = file.vec, header = FALSE, skip = 0,     # read all the trajectory files into a list of data.frames
                             cnames = trajCnames)
  traj.dat.list <- parallel::mclapply(1:length(traj.dat.list),                  # add file name as a column to each data.frame
                                      function(x, dat.list){
                                        dat.list[[x]]["filename"] <- names(dat.list)[x]
                                        return(dat.list[[x]])
                                      }, 
                                      dat.list = traj.dat.list)
  traj.dat <- do.call("rbind", traj.dat.list)                                  # collapse trajectory data into a single data.frame
  traj.dat["siteyear"] <- unlist(parallel::mclapply(unlist(traj.dat["filename"]),# add new column made of site and year
                                                    function(x){
                                                      paste(unlist(strsplit(x, split = "_"))[1:2], collapse = "_")
                                                    }))
  siteyear.vec <- as.vector(unlist(unique(traj.dat["siteyear"])))
  #
  # base map
  basemap <- .buildbasemap(stations.df, map.xlim, map.ylim)
  #
  # plot
  res <- list()
  for(i in 1:length(siteyear.vec)){
    dtraj.df <- traj.dat[traj.dat$siteyear == siteyear.vec[i], ]
    plot.map <- file.path(path.out, paste(siteyear.vec[i], "_map.", device, sep = ""), fsep = .Platform$file.sep)
    m <- basemap + ggplot2::geom_path(data = dtraj.df, mapping = ggplot2::aes(x = lon, y = lat, group = filename, colour = filename)) + 
      ggplot2::geom_point(data = dtraj.df[1, c("lon", "lat")], mapping = ggplot2::aes(x = lon, y = lat, group = NA), shape = 10, size = 3) + 
      ggplot2::theme(legend.position="none") + 
      ggplot2::ggtitle(label = dtraj.df[1, "siteyear"])
    if(plot2file){
      ggplot2::ggsave(filename = plot.map, plot = m, device = device, width = map.width, height = map.height)
    }else{
      print(m)
    }
    res[[i]] <- plot.map
  }
  return(res)
}



#' @title Invert the parameters of rgeos::gIntersection
#' @name intersectTraj.intersect
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Invert the parameters of rgeos::gIntersection
#'
#' @param g2   A SP object
#' @param g1   A SP object
#' @param byid A logical vector to pass to rgeos::gIntersection
#' @return The intersection resutl as SP objects
#' @export
intersectTraj.intersect <- function(g2, g1, byid){
  return(rgeos::gIntersection(spgeom1 = g1, spgeom2 = g2, byid = byid))
}




#' @title Compute the trajectory time
#' @name computeTrajTime
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Compute the trajectory time
#'
#' @param file.vec A vector of character. The paths to the input files
#' @param line.vec A vector. Ids of rows in each file in file.vec
#' @param cnames   A character. Column names of hysplit files
#' @return         A list of difftime (in days)
#' @export
computeTrajTime <- function(file.vec, line.vec, cnames){
  #cnames = HYSPLIT.COLNAMES
  data.list <- files2df(file.vec = file.vec, header = FALSE, skip = 0, cnames = cnames)
  lapply(seq_along(data.list), function(x, data.list, line.vec){
    res <- NA
    adf <- data.list[[x]]
    l <- line.vec[x]
    if(!is.na(l)){
      date.s <- unlist(adf[l, 3:7])
      date.e <- unlist(adf[1, 3:7])
      if(date.s[1] < 100){date.s[1] <- date.s[1] + 2000}
      if(date.e[1] < 100){date.e[1] <- date.e[1] + 2000}
      date.se <- as.data.frame(rbind(date.s, date.e))
      date.se[, 2] <- formatC(date.se[, 2], width = 2, flag = 0)
      date.se[, 3] <- formatC(date.se[, 3], width = 2, flag = 0)
      date.se[, 4] <- formatC(date.se[, 4], width = 2, flag = 0)
      date.se[, 5] <- formatC(date.se[, 5], width = 2, flag = 0)
      date.se[, 6] <- rep("00", time = 2)
      date.s <- as.POSIXct(paste(paste(date.se[1, 1:3], collapse = "-"), paste(date.se[1, 4:6], collapse = ":"), sep = " "))
      date.e <- as.POSIXct(paste(paste(date.se[2, 1:3], collapse = "-"), paste(date.se[2, 4:6], collapse = ":"), sep = " "))
      res <- difftime(time1 = date.e, time2 = date.s, units = "days")
    }
    return(res)
  }, 
  data.list = data.list, 
  line.vec = line.vec
  )
}





#---- BRIEFCASE ----


#' @title Get data from briefcases
#' @name get_os
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#'
#' @description Extract history A and C from collected report files 
#'
#' @param file.in A character. The path to a collect report file. i.e /home/user/PFP_3723_ALF_2010_02_17.txt
#' @return        A data.frame with the results of comamnds "HISTORY> A" and "HISTORY> C"
#' @export
getHistoryAC <- function(file.in){
  profile <- paste(unlist(strsplit(sub("([^.]+)\\.[[:alnum:]]+$", "\\1", 
                                       basename(file.in)), split = "_"))[3:6], collapse = "_")
  file.dat <- readLines(file.in, skipNul = TRUE, warn = FALSE)
  hs <- grep("HISTORY>", file.dat)
  # find the commands in the text
  hA <- grep("^HISTORY> A$", file.dat, ignore.case = TRUE)
  hC <- grep("^HISTORY> C$", file.dat, ignore.case = TRUE)
  if(length(hA) == 0 | length(hC) == 0){
    warning(sprintf("HISTORY A or C not found in: %s", file.in))
    pres <- cbind(as.data.frame(t(rep(NA, time = 16))), profile)
    names(pres) <- c("sample", "plan", "start", "end", "min", "max", "mean", "temperature (C)", "humidity (%RH)", "pressure (mbar)", "planmts", "startmts", "endmts", "minmts", "maxmts", "meanmts", "profile")
    return(pres)
  }
  if(length(hA) > 1 || length(hC) > 1){
    stop(sprintf("Ambiguos HISTORY tag found in: %s", file.in))
  }

  # get the text for HISTORY> A
  hA.dat <- file.dat[(hA + 1):hs[match(hA, hs) + 1]]
  hA.dat <- hA.dat[1:(length(hA.dat) - 1)]                                      # removes the last line
  hA.dat <- gsub("\\(low\\)|\\(high\\)", "", hA.dat)                            # remove instances of (low) and (high)
  # get the text for HISTORY> C
  hC.dat <- file.dat[(hC + 1):hs[match(hC, hs) + 1]]
  hC.dat <- hC.dat[1:(length(hC.dat) - 1)]
  #
  pat <- "*  |*/"                                                                # regular expression' pattern to split 
  #
  hA.dat.split <- strsplit(hA.dat, split = pat)
  hA.mat <- trimws(do.call("rbind", lapply(hA.dat.split, function(x){x <- x[x != ""]})))
  hA.df <- as.data.frame(hA.mat[2:nrow(hA.mat),], stringsAsFactors = FALSE)
  colnames(hA.df) <- trimws(hA.mat[1, ])
  hA.df <- data.matrix(hA.df)
  #
  hC.dat.split <- strsplit(hC.dat, split = pat)
  hC.mat <- trimws(do.call("rbind", lapply(hC.dat.split, function(x){x <- x[x != ""]})))
  hC.df <- as.data.frame(hC.mat[2:nrow(hC.mat),], stringsAsFactors = FALSE)
  colnames(hC.df) <- trimws(hC.mat[1, ])
  hC.df <- data.matrix(hC.df)
  # validation of new briefcase model
  if(colnames(hA.df)[length(colnames(hA.df))] != "mean"){                         # accounts for different names in columns on different models of briefcases
    colnames(hA.df)[length(colnames(hA.df))] <- "mean"
    colnames(hC.df) <- c("sample", "temperature (C)", "pressure (mbar)", "humidity (%RH)")  # re-name to old names
    hC.df <- hC.df[,   c("sample", "temperature (C)", "humidity (%RH)", "pressure (mbar)")]   # re-oprder to match the old briefcases' structure
  }
  # feet to meters
  f2m <- 0.3048
  res <- cbind(merge(hA.df, hC.df, by = "sample"))
  res["planmts"] <- as.vector(res["plan"]) * f2m
  res["startmts"] <- as.vector(res["start"]) * f2m
  res["endmts"] <- as.vector(res["end"]) * f2m
  res["minmts"] <- as.vector(res["min"]) * f2m
  res["maxmts"] <- as.vector(res["max"]) * f2m
  res["meanmts"] <- as.vector(res["mean"]) * f2m
  #
  return(cbind(res, profile))
}
albhasan/cqmaTools documentation built on Dec. 12, 2023, 9:25 a.m.