Nothing
#removes values above sat_limit*power_threshold
#' Removing Saturated Data
#'
#' @description Tests for readings which may indicate saturation of the system.
#' Removes values above the power saturation limit (calculated by multiplying
#' sat_limit and power_thresh).
#'
#' @param df A dataframe containing pv data.
#' @param var_list A list of the dataframe's standard variable names, obtained from
#' the output of \code{\link{plr_variable_check}}.
#' @param sat_limit An upper limit on power saturation. This is multiplied by the
#' power threshold, and power values above this point are filtered from
#' the dataframe. The value depends on the system's inverter.
#' @param power_thresh An upper limit on power.
#'
#' @return Returns passed data frame with rows removed which contain power values above the specified threshold
#'
#' @examples
#' # build var_list
#' var_list <- plr_build_var_list(time_var = "timestamp",
#' power_var = "power",
#' irrad_var = "g_poa",
#' temp_var = "mod_temp",
#' wind_var = NA)
#' # Clean Data
#' test_dfc <- plr_cleaning(test_df, var_list, irrad_thresh = 100,
#' low_power_thresh = 0.01, high_power_cutoff = NA)
#'
#' test_dfc_removed_saturation <- plr_saturation_removal(test_dfc, var_list,
#' sat_limit = 3000, power_thresh = 0.99)
#'
#' @importFrom rlang .data
#' @export
plr_saturation_removal <- function(df, var_list, sat_limit, power_thresh = 0.99) {
df <- dplyr::filter(df, df[, var_list$power_var] <= as.numeric(as.character(sat_limit)) * power_thresh)
return(df)
}
#' Statistical k-means Test
#'
#' @description The method builds linear models by day, identifies outliers,
#' and performs 2-means clustering by slopes. If the lower identified
#' cluster is significantly less than the higher mean, and constitutes less
#' than 25\% of the data, it is identified as soiled and returned. Otherwise, the
#' outlier points are identified as soiled and returned.
#'
#' @param df A df containing pv data. Should be 'cleaned' by \code{\link{plr_cleaning}}.
#' @param var_list A list of the dataframe's standard variable names, obtained from
#' the output of \code{\link{plr_variable_check}}.
#' @param mean_ratio This scales the higher identified cluster's mean for comparison.
#' Higher values will be more likely to identify the second mean as soiled, and vice versa.
#' Values should range from 0 to 1.
#' @param plot optional; Boolean; whether to return the box plot generated by the method
#' to identify outliers.
#' @param file_path optional; location to store the boxplot if plot is set TRUE.
#' Note this is not necessary if you select to plot - only if you wish to save it.
#' @param file_name optional; name of file to save boxplot if plot is set to TRUE.
#' @param set_cutoff Defaults to FALSE; pass a numeric value to cut off all slopes
#' less than the cutoff value. This bypasses entirely the outlier and clustering
#' calculuations to remove slope values you believe to be soiled.
#'
#' @return The method returns a dataframe containing the values that should be
#' removed. If you want to discard them, try using dplyr::filter().
#'
#'
plr_kmeans_test <- function(df, var_list, mean_ratio = 0.7, plot = FALSE,
file_path, file_name, set_cutoff = FALSE) {
#adjust dates to char
df$day <- sapply(as.Date(df[, var_list$time_var]) - as.Date(df[, var_list$time_var][1]), as.character)
slopes <- NULL
rr <- NULL
for (i in unique(df$day)) {
temp <- subset(df, df$day == i) #using .data$ here creates an error
if (nrow(temp) > 3) {
mod <- lm(iacp ~ poay - 1, data = temp)
# mod <- lm(iacp ~ poay, data = temp)
slope <- as.numeric(stats::coefficients(mod)[1])
rs <- summary(mod)$adj.r.squared
slopes <- rbind(c(slope, i, as.character(as.Date(temp$tmst[1]))), slopes) #adds to top
rr <- rbind(rs, rr)
}
}
# temp <- dat %>% dplyr::group_by(day) %>% summarise(lm(iacp ~ poay - 1)$coefficients[1])
slopes <- as.data.frame(slopes, row.names = FALSE) # make dataframe
slopes$rr <- as.numeric(rr) # cast as numeric (will make non-numerics NA)
slopes <- slopes[stats::complete.cases(slopes), ] # filters out all NA's
slopes$V1 <- as.numeric(as.character(slopes$V1)) # forces numeric
if (set_cutoff != FALSE) {
slopes$set_cut <- ifelse(slopes$V1 > set_cutoff, TRUE, FALSE)
days_to_remove <- as.character(slopes$V2[which(slopes$set_cut == FALSE)])
} else {
# cluster
try <- cluster::pam(slopes$V1, k = 2)
slopes$cluster <- try$clustering
# outlier check
box <- graphics::boxplot(slopes$V1)
slopes$out <- ifelse(slopes$V1 %in% box$out, TRUE, FALSE)
box_out <- length(which(slopes$out == TRUE))
box_in <- length(which(slopes$out == FALSE))
out_days <- as.character(slopes$V2[which(slopes$out == TRUE)])
# plot(slopes$V1, col = as.factor(slopes$out))
# clust <- kmeans(slopes$V1, 2)
# test <- data.frame(x = as.numeric(as.character(slopes$V1)),
# y = slopes$cluster,
# z = as.numeric(as.character(slopes$V2)),
# rr = slopes$rr)
#
mean1 <- mean(slopes$V1[which(slopes$cluster == 1)])
mean2 <- mean(slopes$V1[which(slopes$cluster == 2)])
# plot(test$x, col = test$y)
# ggplot(test, aes(x = z, y = x)) +
# geom_point(aes(color = rr)) +
# labs(x = 'day', y = 'slope', title = paste0('Daily slope dist, site ', file))
if (plot == TRUE) {
grDevices::png(filename = paste0(file_path, file_name, ".png"))
# print(ggplot(test, aes(x = z, y = x)) +
# geom_point(aes(color = rr)) +
# labs(x = "day", y = "slope", title = paste0("Daily slope dist, site ", file)))
print(plot(slopes$V1, col = slopes$cluster, xlab = "days", ylab = "slope",
main = paste0("Daily slope dist, site ", file_name)))
grDevices::dev.off()
}
#identifying meaningful clusters
if (mean1 > mean2) {
mean_clear <- mean1
mean_soil <- mean2
clear_clust <- 1
} else {
mean_clear <- mean2
mean_soil <- mean1
clear_clust <- 2
}
clear_days <- slopes$V2[which(slopes$cluster == clear_clust)]
day_ratio <- length(clear_days) / length(unique(df$day))
soil_days <- as.character(slopes$V2[which(slopes$cluster != clear_clust)])
days_to_remove <- NULL
if (mean_ratio * mean_clear > mean_soil && day_ratio > 0.75) {
#df <- df[which(df$day %in% clear_days),]
days_to_remove <- soil_days
} else if (box_out / box_in <= 0.15) {
days_to_remove <- out_days
}
}
return(days_to_remove)
}
# seems to find days with power readings outside of correlation threshold
# and assume the system failed those days. Removes outliers. It has the option
# of calling kmeans test at one point.
#' Decompose Seasonality from Data
#'
#' @description Decomposes seasonality from a dataframe that has already
#' passed through a PLR Determination test, e.g. \code{\link{plr_xbx_model}}. This method has
#' the option of creating plot and data files.
#'
#' @param data a dataframe containing PV data that has undergone a power
#' predictive model, e.g. \code{\link{plr_xbx_model}}.
#' @param freq the frequency of seasonality. This is typically 4 but depends
#' on the location of the system.
#' @param power_var name of the power variable, e.g. iacp
#' @param time_var name of the time variable, e.g. tvar
#' @param plot boolean indicating if you wish to save a plot.
#' @param plot_file location to save the plot, if the plot param is given TRUE.
#' @param title the title of the plot created if the plot param is given TRUE.
#' @param data_file location to save data. Currently non-functional.
#'
#' @return Dataframe containing decomposed time series features
#'
#' @examples
#' #' # build var_list
#' var_list <- plr_build_var_list(time_var = "timestamp",
#' power_var = "power",
#' irrad_var = "g_poa",
#' temp_var = "mod_temp",
#' wind_var = NA)
#' # Clean Data
#' test_dfc <- plr_cleaning(test_df, var_list, irrad_thresh = 100,
#' low_power_thresh = 0.01, high_power_cutoff = NA)
#' # Perform power modeling step
#' test_xbx_wbw_res <- plr_xbx_model(test_dfc, var_list, by = "week",
#' data_cutoff = 30, predict_data = NULL)
#'
#' test_xbx_wbw_decomp <- plr_decomposition(test_xbx_wbw_res, freq = 4,
#' power_var = 'power_var', time_var = 'time_var',
#' plot = FALSE, plot_file = NULL, title = NULL,
#' data_file = NULL)
#'
#' @importFrom stlplus stlplus
#' @importFrom stats lm
#'
#' @export
plr_decomposition <- function(data, freq, power_var, time_var, plot = FALSE,
plot_file = NULL, title = NULL, data_file = NULL) {
# require(stlplus)
# require(padr)
# require(ggplot2)
# if (!("time_var" %in% colnames(data))) {
# stop("column time_var not detected. Pass the data through a PLR determining test.")
total.age <- 1:max(data[, time_var], na.rm = TRUE)
power.sum <- as.data.frame(total.age)
# tibble object does not work with preexisting logic
# convert to dataframe
data <- as.data.frame(data)
#add NA rows where there is no monthly data
#collect the power and error data
for (j in power.sum$total.age) {
if (j %in% data[, time_var]) {
power.sum$power[j] <- data[which(data[, time_var] == j), power_var]
power.sum$sigma[j] <- data$sigma[which(data[, time_var] == j)]
} else {
power.sum$power[j] <- NA
power.sum$sigma[j] <- NA
}
}
#If data is missing at the start is NA, remove it and start from the existing values
while (is.na(power.sum$power[1])) {
power.sum <- power.sum[-1, ]
}
#Same for NAs at the end of the time series
while (is.na(power.sum$power[length(power.sum$power)])) {
power.sum <- power.sum[-length(power.sum$power), ]
}
#define time series object of the predicted power, frequency is 12 months/year
ts.power <- stats::ts(power.sum$power, frequency = freq)
#stlplus to decompose with NA months
stl.model <- stlplus(x = ts.power, s.window = "periodic")
stl.data <- stl.model[[1]]
#define column showing if trend month was interpolated from an NA month
stl.data$interpolated <- ifelse(is.na(stl.data$raw), TRUE, FALSE)
stl.data$age <- power.sum$total.age[1]:utils::tail(power.sum$total.age, n = 1)
stl.data$sigma <- power.sum$sigma
stl.data$operating <- 1:length(stl.data$age)
stl.data$power <- ifelse(stl.data$interpolated == TRUE, NA, stl.data$trend)
Rd <- lm(power ~ operating, data = stl.data, weights = 1 / sigma)
if (plot == TRUE) {
grDevices::png(filename = plot_file)
#plot the trend of the predicted power, color by if the month had raw data, add sigma error bar
plot(stl.data$operating,
stl.data$trend,
col = ifelse(stl.data$interpolated == TRUE, "green", "blue"),
ylim = c(0, 1.1 * max(stl.data$trend)),
xlab = "Age (Pseudo Months)",
ylab = "Predicted Power (KW) Trend",
main = title)
graphics::arrows(stl.data$operating,
stl.data$trend - stl.data$sigma,
stl.data$operating,
stl.data$trend + stl.data$sigma,
length = 0.03,
angle = 90,
code = 3,
col = "blue")
stl.data$power <- ifelse(stl.data$interpolated == TRUE, NA, stl.data$trend)
#define power change trendline
Rd <- lm(power ~ operating, data = stl.data, weights = 1 / sigma)
graphics::abline(Rd)
grDevices::dev.off()
}
#change rate (%/yr) is defined as slope/intercept*12
Rc <- summary(Rd)$coefficients[[2]] / summary(Rd)$coefficients[[1]] * 12 * 100
return(stl.data)
#write.csv(stl.data, data_file)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.