#'
#'@title Calculate linear regressions for time series in Y as function of time series in X
#'
#'@description Function to calculate linear regressions between time series in Y as function of time series in X.
#'
#'@param mdfrX - melted dataframe with independent (X) time series
#'@param mdfrY - melted dataframe with dependent (Y) time series
#'@param yvars - vector of dependent variables to process (or NULL for all)
#'@param xlab - label associated with independent variables
#'@param ylab - label associated with dependent variables
#'@param nrows - number of rows for LR plots
#'@param coord_fixed - flag to use equal x/y unit dimensions in LR plots
#'@param labelByGroup - flag to use "group" column to organize linear regression analysis
#'@param verbose - flag to print messages
#'
#'@return list with named elements 'res', 'summary' and 'plot': \cr
#'\itemize{
#' \item res - a list with sublists by unique X with elements
#' \itemize{
#' \item lms - list with linear model (lm) results by Y for X
#' \item summary - dataframe with summaries of linear model results
#' \item plots - list of ggplot2 objects (p1=standardized time series, p2=fits to lm's)
#' }
#' \item summary - a dataframe summarizing all LR results. Columns are
#' \itemize{
#' \item ygroup - dependent variable group label
#' \item y - dependent variable label
#' \item xgroup - independent variable group label
#' \item x - independent variable label
#' \item n - number of valid data points
#' \item rho - Pearson's correlation coefficient
#' \item rsq - R-squared for linear fit
#' \item p - P-value (uncorrected for multiple comparisons)
#' }
#' \item plot - a ggplot object.
#' \item data - dataframe with data involved in linear models
#'}
#'
#'@details none.
#'
#'@import ggplot2
#'@import plyr
#'@import reshape2
#'@import wtsUtilities
#'
#'@export
#'
calcLRs.YsByX<-function(mdfrX,
mdfrY,
yvars=NULL,
xlab="index",
ylab=NULL,
nrows=2,
coord_fixed=FALSE,
labelByGroup=FALSE,
verbose=FALSE){
# require(ggplot2);
# require(plyr);
# require(reshape2);
# require(wtsUtilities);
#define objects for output
res<-list();#list of individual LR results and plot objects
sums<-NULL; #dataframe for summary of LR results
##standardize independent (X) values by variable, group across years
if (!('group' %in% names(mdfrX))) mdfrX$group<-"";#add group column
mdfrZXs<-ddply(mdfrX,.variables=c('group','variable'),.fun=standardize,col='value',.inform=TRUE);
##standardize dependent (Y) values by variable, group across years
if (!('group' %in% names(mdfrY))) mdfrY$group<-"";#add group column
mdfrZYs<-ddply(mdfrY,.variables=c('group','variable'),.fun=standardize,col='value',.inform=TRUE);
uXGs<-as.character(unique(mdfrZXs[["group"]]));
uXVs<-as.character(unique(mdfrZXs[["variable"]]));
uYGs<-as.character(unique(mdfrZYs[["group"]]));
uYVs<-as.character(unique(mdfrZYs[["variable"]]));
if (is.null(yvars)) yvars<-uYVs;
for (uXV in uXVs){
if (verbose) cat("Processing independent variable ",uXV,"\n");
mdfrYsByX<-NULL;
for (uYG in uYGs){
tmp<-mdfrZXs[mdfrZXs$variable==uXV,];
tmp$group<-uYG;
tmp$linetype<-uXV;
mdfrYsByX<-rbind(mdfrYsByX,tmp);
}
tmp<-mdfrZYs;
tmp$linetype<-ylab;
mdfrYsByX<-rbind(mdfrYsByX,tmp);
##plot zscores for uYVs and uXV by year (x axis) and group (plot)
ncol<-ceiling(length(uYVs)/8);
p1 <- ggplot(mdfrYsByX,aes_string(x='year',y='value',
colour='variable',linetype="linetype"));
p1 <- p1 + geom_line(size=1.25);
p1 <- p1 + scale_linetype_manual(values=c('dotted','solid'),breaks=c(uXV,ylab),limits=c(uXV,ylab))
# p1 <- p1 + scale_colour_discrete(aesthetics="value",scale_name="colour",breaks=c(uXV,uYVs))
p1 <- p1 + ylab('z-score');
p1 <- p1 + facet_grid(group~.)
p1 <- p1 + guides(colour=guide_legend(title='',order=2,ncol=ncol),
linetype=guide_legend(title='',order=1));
#print(p1);
lm.vars<-list();
sum.vars<-NULL;
##process by unique values of "group"
for (uYG in uYGs){
if (verbose) cat("--Processing Y group ",uYG,'\n');
dfrYsOnX<-dcast(mdfrYsByX[mdfrYsByX$group==uYG,],year~variable,fun.aggregate=sum,value.var='value');
##linear model analysis
for (yvar in yvars){
if (verbose) cat("----Assessing dependent variable ",yvar,'\n');
if (!is.null(dfrYsOnX[[uXV]])&&!is.null(dfrYsOnX[[yvar]])){
tst<-sum(abs(dfrYsOnX[[uXV]]),na.rm=TRUE)*sum(abs(dfrYsOnX[[yvar]]),na.rm=TRUE);
if(!is.na(tst)&&(tst>0)){
gv<-yvar;
if (labelByGroup) gv<-paste(uYG,gv);
if (verbose) cat("------Processing dependent group/variable ",gv,'\n');
lm.vars[[gv]]<-lm(as.formula(paste("`",yvar,"`~\`",uXV,"`",sep='')),dfrYsOnX);
s<-summary(lm.vars[[gv]]);
sum.vars<-rbind(sum.vars,data.frame(ygroup=uYG,
y=yvar,
n=sum((!is.na(dfrYsOnX[[uXV]]))&(!is.na(dfrYsOnX[[yvar]]))),
rho=s$coefficients[2,1],
rsq=s$r.squared,
p=s$coefficients[2,4]));
}
}
}#yvars loop
##if (verbose) cat(uYG,'\n');
}#uYGs loop
##plot the linear fits
tmp<-mdfrZYs[mdfrZYs$variable %in% yvars,];
if (!labelByGroup) tmp$group<-'';
dY<-dcast(tmp,year~group+variable,value.var='value');
mY<-melt(dY,id.vars='year');
mYX<-rbind(mdfrZXs[mdfrZXs$variable==uXV,c('year','variable','value')],mY)
dYX<-dcast(mYX,year~variable,value.var='value');
mYonX<-melt(dYX,id.vars=c('year',uXV));
mYonX$variable<-gsub("_"," ",mYonX$variable,fixed=TRUE);
p2 <- ggplot(mYonX,aes_string(y='value',x=paste0('`',uXV,'`')));
p2 <- p2 + geom_point(size=1.25);
p2 <- p2 + stat_smooth(method="lm",formula=y~x);
if (coord_fixed) p2 <- p2 + coord_fixed();
p2 <- p2 + xlab(paste0('z-score(',uXV, ')'));
p2 <- p2 + ylab(paste0('z-score(',ylab,')'));
p2 <- p2 + facet_wrap(~variable,nrow=nrows);
#print(p2);
res[[uXV]]<-list(lms=lm.vars,summary=sum.vars,plots=list(p1=p1,p2=p2))
sum.vars$xgroup<-'';
sum.vars$x <-uXV;
sums<-rbind(sums,sum.vars[,c("ygroup","y","xgroup","x","n","rho","rsq","p")]);
}##uXVs
##plot the linear fits on one page
mYonXp<-NULL;
for (uXV in uXVs){
tmp<-mdfrZYs[mdfrZYs$variable %in% yvars,];
if (!labelByGroup) tmp$group<-'';
dY<-dcast(tmp,year~group+variable,value.var='value');
mY<-melt(dY,id.vars='year');
mYX<-rbind(mdfrZXs[mdfrZXs$variable==uXV,c('year','variable','value')],mY)
dYX<-dcast(mYX,year~variable,value.var='value');
mYonX<-melt(dYX,id.vars=c('year',uXV));
mYonX$variable<-gsub("_"," ",mYonX$variable,fixed=TRUE);
mYonX$xvar<-uXV;
names(mYonX)<-c("year","x","yvar","y","xvar");
mYonXp<-rbind(mYonXp,mYonX[,c("year","yvar","y","xvar","x")]);
}
p3 <- ggplot(mYonXp,aes_string(y="y",x="x"));
p3 <- p3 + geom_point(size=1.25);
p3 <- p3 + stat_smooth(method="lm",formula=y~x);
if (coord_fixed) p3 <- p3 + coord_fixed();
p3 <- p3 + xlab(paste0('z-score(',xlab,')'));
p3 <- p3 + ylab(paste0('z-score(',ylab,')'));
p3 <- p3 + facet_grid(yvar~xvar);
#print(p3);
##create a dataframe for the data to the linear fits, but without using "group"
mYonXp<-NULL;
for (uXV in uXVs){
tmp<-mdfrZYs[mdfrZYs$variable %in% yvars,];
dY<-dcast(tmp,year~variable,value.var='value');
mY<-melt(dY,id.vars='year');
mYX<-rbind(mdfrZXs[mdfrZXs$variable==uXV,c('year','variable','value')],mY)
dYX<-dcast(mYX,year~variable,value.var='value');
mYonX<-melt(dYX,id.vars=c('year',uXV));
mYonX$variable<-gsub("_"," ",mYonX$variable,fixed=TRUE);
mYonX$xvar<-uXV;
names(mYonX)<-c("year","x","yvar","y","xvar");
mYonXp<-rbind(mYonXp,mYonX[,c("year","yvar","y","xvar","x")]);
}
return(invisible(list(res=res,summary=sums,plot=p3,data=mYonXp)));
}
# mdfrI<-mdfrEIs; ylab<-'Index'; vars<-c('AO','PDO','MEI','lagged MEI');
# #mdfrI<-mdfrCSFs; ylab<-'Cross-shelf flow';vars<-c('east','central','west');
# lst<-calcLinRegs.YsByX(mdfrEIs,mdfrFbySA,xlab='index',ylab='FbySA',yvars=formatZeros(1:11));
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.