#' Prediction at new locations based on the fitting results of original dataset
#'
#' @param newdata
#' A data.frame includes all locations' longitude, latitude, and elevation,
#' where the prediction is to be calculated.
#' @param fitted
#' Can be either a data.frame in memory or HDFS path which contains all fitting results
#' of original dataset.
#' @param output
#' The output path of fitting results on HDFS. If data is a data.frame object,
#' the output should be set as default NULL. Since the function will return
#' the fitting results in memory.
#' @param stat_info
#' The RData on HDFS which contains all station metadata. Make sure
#' copy the RData of station_info to HDFS first using rhput.
#' @param model_control
#' Should be a list object generated from \code{spacetime.control} function.
#' The list including all necessary smoothing parameters of nonparametric fitting.
#' @param cluster_control
#' Should be a list object generated from \code{mapreduce.control} function.
#' The list including all necessary Rhipe parameters and also user tunable
#' MapReduce parameters. It is only necessary for data on HDFS situation. If data
#' is data.frame in memory, this parameter should be kept as default NULL.
#' @author
#' Xiaosu Tong
#' @export
#' @seealso
#' \code{\link{spacetime.control}}, \code{\link{mapreduce.control}}
#'
#' @examples
#' \dontrun{
#' mcontrol <- spacetime.control(
#' vari="resp", time="date", n=576, n.p=12, stat_n=7738, surf = "interpolate",
#' s.window="periodic", t.window = 241, degree=2, span=0.015, Edeg=2
#' )
#' ccontrol <- mapreduce.control(
#' libLoc= NULL, reduceTask=169, io_sort=128, slow_starts = 0.5,
#' map_jvm = "-Xmx200m", reduce_jvm = "-Xmx200m",
#' map_memory = 1024, reduce_memory = 1024,
#' reduce_input_buffer_percent=0.4, reduce_parallelcopies=10,
#' reduce_merge_inmem=0, task_io_sort_factor=100,
#' spill_percent=0.9, reduce_shuffle_input_buffer_percent = 0.8,
#' reduce_shuffle_merge_percent = 0.4
#' )
#' new.grid <- expand.grid(
#' lon = seq(-126, -67, by = 0.5),
#' lat = seq(25, 49, by = 0.5)
#' )
#' instate <- !is.na(map.where("state", new.grid$lon, new.grid$lat))
#' new.grid <- new.grid[instate, ]
#'
#' elev.fit <- spaloess( elev ~ lon + lat,
#' data = station_info,
#' degree = 2,
#' span = 0.015,
#' distance = "Latlong",
#' normalize = FALSE,
#' napred = FALSE,
#' alltree = FALSE,
#' family="symmetric",
#' control=loess.control(surface = "direct")
#' )
#' grid.fit <- predloess(
#' object = elev.fit,
#' newdata = data.frame(
#' lon = new.grid$lon,
#' lat = new.grid$lat
#' )
#' )
#' new.grid$elev2 <- log2(grid.fit + 128)
#'
#' #if the original fitting results are in memory
#' fitted <- drsstl(
#' data=tmax_all,
#' output=NULL,
#' stat_info="station_info",
#' model_control=mcontrol
#' )
#' predNewLocs(
#' original = fitted, newdata = new.grid, model_control = mcontrol
#' )
#'
#' #if the fitting results are on HDFS
#' predNewLocs(
#' fitted="/tmp/output/output_bymth", newdata=new.grid, output = "/tmp",
#' station_info="/tmp/station_info.RData", model_control = mcontrol,
#' cluster_control = ccontrol
#' )
#' }
predNewLocs <- function(fitted, newdata, output = NULL, stat_info=NULL, model_control=spacetime.control(), cluster_control=NULL) {
if ("ddf" %in% class(fitted)) {
rst <- predNew_local(original=recombine(fitted, combRbind), newdata=newdata, mlcontrol=model_control)
return(rst)
} else if(class(fitted) == "data.frame") {
rst <- predNew_local(original=fitted, newdata=newdata, mlcontrol=model_control)
return(rst)
} else if (class(fitted) == "character") {
if(is.null(output)) {
stop("An output path on HDFS should be specified")
}
if(is.null(cluster_control)) {
stop("A cluster control must be specified for data on HDFS")
}
predNew_mr(newdata=newdata, input=fitted, output=output, info = stat_info, mlcontrol=model_control, clcontrol=cluster_control)
} else {
stop("The input data should be either a data.frame in memory or a HDFS path of input data")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.