Nothing
#' @title Learning the spatiotemporal properties of
#' a sample data
#' @description Learns both the spatial and the temporal
#' properties of a real sample dataset.
#' @param ppt A 3-column matrix or list containing
#' `x` - eastings, `y` - northing, and `t` - time of occurrence
#' (in the format: `yyyy-mm-dd').
#' @param start_date the start date of the temporal pattern.
#' The date should be in the format `"yyyy-mm-dd"`.
#' The temporal pattern will normally
#' cover 1-year period.
#' @param poly (An sf or S4 object)
#' a polygon shapefile defining the extent of the landscape
#' @param gridSize the size of square grid
#' to use for discretizing the space.
#' Default is: \code{150}.
#' @param n_origin number of locations to serve as
#' origins for walkers. Default:\code{50}.
#' @param p_ratio (an integer) The smaller of the
#' two terms of a Pareto ratio.
#' For example, a value of \code{20}
#' implies a \code{20:80} Pareto ratio.
#' @param s_range A value (in metres), not less than 150,
#' specifying the maximum range of spatial
#' interaction across the space. For example, for 150m,
#' the intervals of spatial interactions are created as
#' \code{(0, 50]}, \code{(50 - 100]}, and \code{(100-150]},
#' representing the "small", "medium", and "large",
#' spatial interaction ranges, respectively. If
#' `s_range` is set as `NULL`, simulation
#' focusses only on generating point pattern with
#' similar spatiotemporal patterns as the sample
#' dataset.
#' @param tolerance Pvalue to use for the extraction of
#' space-time interaction in the sample data. Default
#' value: \code{0.07}.
#' @param crsys (string) the EPSG code of the projection
#' system of the `ppt` coordinates. This only used if
#' `poly` argument is \code{NULL}.
#' See "http://spatialreference.org/" for the list of
#' EPSG codes for different regions of the world.
#' As an example, the EPSG code for the British National Grid
#' projection system is: \code{"EPSG:27700"}.
#' @param show.plot (TRUE or FALSE) Whether to show
#' some displays.
#' @usage stp_learner(ppt, start_date = NULL, poly = NULL,
#' n_origin=50, p_ratio, gridSize = 150, s_range = 150,
#' tolerance = 0.07,
#' crsys = NULL, show.plot = FALSE)
#' @examples
#' \dontrun{
#' #Goal: To learn the ST properties
#' #of a sample data, for the purpose of
#' #simulating the full dataset (see `psim_real`).
#' data(camden_crimes)
#' #subset 'theft' crime
#' theft <- camden_crimes[which(camden_crimes$type ==
#' "Theft"),1:3]
#' #specify the proportion of full data to use
#' sample_size <- 0.3
#' set.seed(1000)
#' dat_sample <- theft[sample(1:nrow(theft),
#' round((sample_size * nrow(theft)), digits=0),
#' replace=FALSE),]
#' #plot(dat_sample$x, dat_sample$y) #preview
#'
#' stp_learner(dat_sample,
#' start_date = NULL, poly = NULL, n_origin=50,
#' p_ratio=20, gridSize = 150,
#' s_range = 150, tolerance = 0.07,
#' crsys = "EPSG:27700",
#' show.plot = FALSE)
#' }
#' @details Returns an object of the class `real_spo`,
#' storing details of the spatiotemporal
#' properties of the sample data learnt.
#' @return an object (list) containing specific spatial
#' and temporal properties of a sample dataset.
#' @references Silverman, B.W., 2018. Density estimation
#' for statistics and data analysis. Routledge.
#' @importFrom dplyr select group_by
#' mutate summarise left_join n arrange
#' desc
#' @importFrom tidyr replace_na
#' @importFrom sp SpatialPoints proj4string
#' @importFrom stats predict loess
#' @importFrom sf st_area st_geometry st_centroid
#' @importFrom ggplot2 aes theme element_text
#' theme_light geom_sf
#' @importFrom spatstat.geom ppp owin
#' @importFrom sparr OS
#' @importFrom tibble rownames_to_column
#' @importFrom ks hpi
#' @export
#'
stp_learner <- function(ppt, start_date = NULL, poly = NULL,
n_origin=50, p_ratio, gridSize = 150,
s_range = 150, tolerance = 0.07,
crsys = NULL,
show.plot = FALSE){
prob <- NRepeat <- NULL
output <- list()
#global var
grid_id <- count <- NULL
origins <- list()
#check spatial range
if(!is.null(s_range)){
if(s_range < 150){
stop("Spatial range ('s_range') cannot be less than 150.")
}
}
#check if start_date is supplied, if not
#extract from the point data.
if(is.null(start_date)){
#min time
min_t <- min(as.Date(ppt[,3]))
ppt_df <- data.frame(ppt)
colnames(ppt_df) <- c("x","y","t")
#check the date field too
t_format <- length(which(date_checker(c(ppt[,3]))==FALSE))
#check time column
if(t_format > 0){
stop("At least one entry of the 'time' column is not in the correct format!")
}
#ensuring data does not exceed 1-year length
if(as.numeric(difftime(max(ppt_df$t), min(ppt_df$t), units="days")) > 366){
stop(paste("Data length is greater than 365 days!",
"Data length has to be less than or equal to 1-year!"))
}
final_start_date <- min_t
} else{
if(date_checker(c(start_date)) == FALSE){
stop("The 'start_date' specified is not in the correct format!")
}
#check the date field too
t_format <- length(which(date_checker(c(ppt[,3]))==FALSE))
#check time column
if(t_format > 0){
stop("At least one entry of the 'time' column is not in the correct format!")
}
#compared start time with min time
min_t <- min(as.Date(ppt[,3]))
start_date <- as.Date(start_date)
min_compared <- length(which(as.Date(ppt[,3]) < start_date))
if(min_compared > 0){
stop(paste("One or more entry(s) in the time column of 'ppt'",
"is less than the 'start_date'! Consider setting 'start_date = NULL'!", sep=" "))
}
ppt_df <- data.frame(ppt)
colnames(ppt_df) <- c("x","y","t")
min_x <- min(ppt_df$x)
min_y <- min(ppt_df$y)
min_xyt <- data.frame(x=min_x,
y=min_y,
t=start_date) #start_date = "2014-12-31"
#combine
ppt_df <- data.frame(rbind(min_xyt, ppt_df))
max_t <- max(ppt_df$t)
#diff between min and max
#ensuring data does not exceed 1-year length
if(as.numeric(difftime(max(ppt_df$t), min(ppt_df$t), units="days")) > 365){
stop(paste("Data length is greater than 365 days!",
"Less than or equal to 1-year data length is required!"))
}
final_start_date <- start_date
}
#Now, learn the temporal pattern and trend from
#the sample dataset
dat_sample_p <- ppt_df %>%
group_by(t) %>%
summarise(n = n())%>%
mutate(time = round(as.numeric(difftime(t, as.Date(min(t)), units="days")),
digits = 0))%>%
as.data.frame()
#create a sub table
time <- data.frame(time=1:365)
t <- as.vector(unlist(time))
suppressMessages(
dat_sample_p <- time %>%
left_join(dat_sample_p) %>%
dplyr::mutate(n = replace_na(n, 0)))
#unique(dat_sample_p$time)
#plot(dat_sample_p$time, dat_sample_p$n, 'l')
#head(datxy)
#--------------------------------
#smoothen and plot
loessData <- data.frame(
x = 1:nrow(dat_sample_p),
y = predict(loess(n~time, dat_sample_p, span = 0.3)),
method = "loess()"
)
#scaling by a factor of 10
#for high intensity data
s_factor <- 20
#--------------------
loessData$y <- loessData$y * s_factor
gtp <- round(loessData$y, digits = 0)
# if(show.plot == TRUE){
# flush.console()
# plot(t, gtp, 'l')
# }
#--------------------
#----------------------------------
#learn the spatial pattern
#import square grid
#----------------------------------
#check if boundary is supplied,
#if null, create an arbitrary boundary
if(is.null(poly)){
ppt_xy <- ppt[,1:2]
#create boundary from points
boundary_ppt <- chull_poly(ppt_xy)
#then define the crs
if(is.null(crsys)){
stop(paste("'crsys' argument cannot be NULL",
"when 'poly' argument is NULL!!",
"Needs to define the 'crsys' argument", sep=" "))
} else {
suppressWarnings(
proj4string(boundary_ppt) <- CRS(crsys))
#warning msg
}
} else {
if(is.na(crs(poly))){
stop("The 'poly' object has no projection, i.e. crs")
}
#first check that 'poly' has
#a projection
if(!is.null(crsys)){
flush.console()
print(paste("Note: The projection system (crs) of 'poly'",
"object is utilized!!", sep=" "))
}
if(is.na(crs(poly))){
stop(paste("'poly' object must have a projection!"))
}
boundary_ppt <- poly
}
#calculate spatial bandwidth
#boundary coord
x_min <- min(extract_coords(boundary_ppt)$X)
x_max <- max(extract_coords(boundary_ppt)$X)
y_min <- min(extract_coords(boundary_ppt)$Y)
y_max <- max(extract_coords(boundary_ppt)$Y)
#compute the univariate spatial bandwidth
#of x and y, and find average.
#bi-variate tends to over-smooth
#due to natural overlaps between
#events of multiple sources
#suppressWarnings(
b1 <- ks::hpi(x=ppt_df[,1])
b2 <- ks::hpi(x=ppt_df[,2])
sbw <- (b1 + b2) / 2
#)
# ppt_df_ppp <- ppp(x=as.numeric(ppt_df$x),
# y=as.numeric(ppt_df$y),
# window = owin(c(x_min, x_max),
# c(y_min, y_max))))
#
# sbw <- OS(ppt_df_ppp)/4
#determine if projection is in metres or feet
#if metres, use 500m2
#if feet, 5000ft2
suppressWarnings(
s4_proj <- proj4string(boundary_ppt))
#warning
# #determine grid size by dividing the area
# #by n_origin #(work to do)
# m_square <- as.numeric(st_area(st_as_sf(boundary_ppt))) / n_origin
#
#
# if(grepl("+units=m", s4_proj, fixed = TRUE) == TRUE){
# #grid_size <- 500
# grid_size <- m_square^(1/2)
# }
# if(grepl("+units=us-ft", s4_proj, fixed = TRUE)==TRUE){
# #grid_size <- 1700
# grid_size <- m_square^(1/2)
# }
#check the crs
if((grepl("+units=m", s4_proj, fixed = TRUE) == FALSE)&
(grepl("+units=us-ft", s4_proj, fixed = TRUE)==FALSE)){
stop(paste("Specified 'crs' not recognized!",
"A 'Metre- or Feet-based' projection is preferred", sep=" "))
}
#create regular grids
#default 250 square metres
grid_sys <- make_grids(poly=boundary_ppt,
size = gridSize, show_output = FALSE)
#warning msg
#plot(grid_sys)
grid_sys$grid_id <- 1:length(grid_sys)
#plot(grid_sys)
#Now overlay points on the grids
ppt_df <- ppt_df %>%
rownames_to_column("id")
x <- as.numeric(ppt_df$x)
y <- as.numeric(ppt_df$y)
t <- as.Date(ppt_df$t)
xyid_ <- cbind(x, y)
#ppt_df$x <- as.numeric(ppt_df$x)
xyid_ppt <- SpatialPoints(xyid_)
proj4string(xyid_ppt) <- crs(grid_sys)
#plot(xyid_ppt, add=TRUE)
grid_sys <- st_as_sf(grid_sys)
xyid_ppt <- st_as_sf(xyid_ppt)
#convert
pnt_grid_intsct <- st_intersects(xyid_ppt, grid_sys, sparse = TRUE)
pnt_grid_intsct <- data.frame(pnt_grid_intsct)
colnames(pnt_grid_intsct) <- c("id", "grid_id")
pnt_grid_intsct$id <- as.character(pnt_grid_intsct$id)
suppressWarnings(
stc <- st_centroid(grid_sys))
#warning msg
ptsxy <- data.frame(cbind(do.call(rbind, st_geometry(stc)),
stc$grid_id))
colnames(ptsxy) <- c("x","y","grid_id")
#join
#Assign the probability value to each grid
#based on its historical events
suppressMessages(
spo <- ppt_df %>%
left_join(pnt_grid_intsct)%>%
group_by(grid_id) %>%
summarise(count = n()) %>%
arrange(desc(count))%>%
mutate(prob = round(count/sum(count),
digits=10)) %>%
left_join(ptsxy) %>%
arrange(prob)%>%
filter(!is.na(grid_id)))
#now randomly select 'n_origin'
#taking into account the 'resistance_feat'
if(length(spo$grid_id) > n_origin){
samp_idx <- as.numeric(sample(spo$grid_id, size = n_origin,
replace = FALSE, prob = spo$prob)) #%>
spo <- spo %>%
filter(grid_id %in% samp_idx)
}
if(length(spo$grid_id) <= n_origin){
spo <- spo #do nothing
}
#Note: 'resistance_feat' has not use here
#but could be added (optionally) to the simulation function
#if a user deems fit.
spo <- spo %>%
dplyr::select(x, y, prob, count, grid_id)
#x <- as.numeric(spo$x)
#y <- as.numeric(spo$y)
#spo_xy <- data.frame(cbind(x,y))
#colnames(spo_xy) <- c("x","y")
#---------------------------------------------------------
#detect space-time signature
#spatial threshold
if(!is.null(s_range)){
s_list <- seq(0, s_range, len=4)
#temporal thresholds
#ppt <- burg_df_samp
#ppt <- burg_df
colnames(ppt) <- c("x", "y", "date")
#checking the st interaction
ppt_proc <- ppt %>%
dplyr::mutate(date = as.Date(substr(date, 1, 10)))%>%
dplyr::select(x, y,date)%>%
data.frame()
flush.console()
print("****Detecting spatiotemporal signatures:")
myoutput2 <- NRepeat(x = ppt_proc$x, y = ppt_proc$y, time = ppt_proc$date,
sds = s_list,
tds = 1:30,
#tds = seq(1, 30, 2),
s_include.lowest = FALSE, s_right = FALSE, # include leftmost and include right most
t_include.lowest = FALSE, t_right = FALSE)#) # include exclude leftmost and include right most
res <- myoutput2$pvalues
c_005 <- list()
for(h in 1:nrow(res)){ #h<-3
ids <- as.numeric(which(res[h,] <= tolerance))
if(length(ids)!=0){
c_005[[h]] <- ids
names(c_005)[h] <- rownames(res)[h]
}
if(length(ids)==0){
c_005[[h]] <- "NA"
names(c_005)[h] <- rownames(res)[h]
}
}
c_005
st_band <- c_005
#}
if(length(st_band) != 0){
st_band <- st_band
}
##collapsing <- unlist(st_band)
if(length(st_band) == 0){
st_band <- NULL
}
}
if(is.null(s_range)){
st_band <- NULL
}
#---------------------------------------------------------
output$origins <- spo
output$gtp <- gtp
output$start_date <- final_start_date
output$s_threshold <- sbw
#output$plot <- p
output$poly <- boundary_ppt
output$Class <- "real_spo"
output$st_sign <- st_band
output$t_bands <- colnames(res)
#output$t_list <- t_list
#output$t_sign <- t_bd
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.