Nothing
#' @import utils
## quiets concerns of R CMD check re: the .'s that appear in pipelines
#if(getRversion() >= "2.15.1") utils::globalVariables(c(".",">"))
if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
#' Drawing a QQplot
#'
#' The function MakeQQplot displays the QQplot of the -log10(pvalues).
#' @param Pvalues A vector containing pvalues.
#' @param Name A name of the corresponding test. (optional)
#' @param Xrange A range for the x axis. (optional)
#' @param Yrange A range for the y axis. (optional)
MakeQQplot <- function(Pvalues,Name=NULL,Xrange=NULL,Yrange=NULL){
n=sum(!is.na(Pvalues))
mLog10Pval <- Pvalues %>%
sort %>%
log10 %>%
`*`(-1)
mLog10Pval.H0 <- -log10((1:n)/n)
if(is.null(Xrange)){
Xrange=range(mLog10Pval.H0)
}
if(is.null(Yrange)){
Xrange=range(mLog10Pval)
}
plot(mLog10Pval.H0,mLog10Pval,
xlab='Expected -log10(pval)',ylab='Observed -log10(pval)',main=Name,
xlim=Xrange,ylim=Yrange,
type='l',lwd=2.5,cex.axis=1.5,cex.main=2)
abline(0,1,col=2,lwd=2.5)
lines(-log10((1:n)/n),mLog10Pval,
lwd=2.5)
}
#' Display visual checks of pvalues.
#'
#' The function metaGE.pvalplot displays the pvalue distribution and the QQplot of the -log10(pvalues).
#' @param Pvalues A vector containing pvalues.
#' @param Main The main to display.(optional)
#' @return No return value, the plot is displayed in the active graphics window.
#' @export
#' @examples
#' # Import the data
#' data("metaData")
#'
#' # Compute the inter-environment correlation matrix
#' matCorr <- metaGE.cor(metaData, Threshold = 0.8)
#'
#' # Fit the Fixed Effect model
#' FeDF <- metaGE.fit(metaData, matCorr, Method = "Fe")
#'
#' # Check the pvalues
#' metaGE.pvalplot(Pvalues = FeDF$PVALUE, Main= "Pvalue Fe")
metaGE.pvalplot <- function(Pvalues, Main=''){
# #Save the user graphical parameters
# oldpar <- par(no.readonly = TRUE)
# on.exit(par(oldpar))
if(!is.numeric(Pvalues)){stop(paste0(Pvalues,' must be a numeric vector.'))}
# par(mfrow=c(1,2))
hist(Pvalues,100,main= Main,xlab='Pvalues', cex.axis=1.5,cex.main=2)
MakeQQplot(Pvalues,Main)
}
#' Draw the heatmap to see markers effects across environments.
#'
#' The function metaGE.heatmap displays the heatmap of the zscores, the estimated marker effects or the pvalues of each markers (in rows) in each environments (in columns).
#' @param Data A dataset containing the zscores, the effects or the pvalues of each marker (in rows) in each environment (in columns), as obtained from [metaGE.fit()].
#' @param Prefix The prefix of the score to display in the heatmap: "\code{Z.}" for the zscores, "\code{EFFECT.}" for the effects and "\code{PVAL.}" for the pvalues.("\code{Z.}" by default)
#' @param EnvGroups A dataset containing the names of the environments (in the first column) and the groups to which the environments belong (in the second column). (optional)
#' @param QTLsVarName The name of the column indicating to which QTL the marker belongs. (optional)
#' @param RowOrder A boolean specifying whether to reorder the markers or not. (\code{TRUE} by default)
#' @param ColOrder A boolean specifying whether to reorder the environments or not. (\code{TRUE} by default)
#' @param ShowDendrogram A boolean specifying whether to show the clustering of the rows and/or the columns. (\code{FALSE} by default)
#' @param Colors A vector of three colors corresponding to the color scale of the Heatmap.(optional)
#' @param Main The main to display.(optional)
#' @return The heatmap
#' @export
#' @importFrom gplots heatmap.2
#' @importFrom grDevices colorRampPalette
#' @examples
#' require(dplyr)
#' # Import the data
#' data("metaData")
#'
#' # Compute the inter-environment correlation matrix
#' matCorr <- metaGE.cor(metaData, Threshold = 0.8)
#'
#' # Fit the Fixed Effect model
#' FeDF <- metaGE.fit(metaData, matCorr, Method = "Fe")
#'
#' # Control the FDR (here Benjamini-Hochberg)
#' Alpha <- 0.05
#' Signif <- FeDF$PVALUE %>% p.adjust(method = "BH") %>% `<`(Alpha) %>% which
#'
#' # Draw the z-scores heatmap of the significant markers
#'heatmap <- metaGE.heatmap(Data = FeDF[Signif,],
#' Prefix = "Z.")
#'
metaGE.heatmap <- function(Data, Prefix='Z.', EnvGroups=NULL, QTLsVarName=NULL,RowOrder=TRUE, ColOrder = TRUE,ShowDendrogram=FALSE, Colors=c("red","black","green"),Main=""){
## Checkings
### Checkings on Data
if((!is_tibble(Data))&(!is.data.frame(Data))){
stop('Data should be either a data.frame or a tibble')
}
### Checkings on Prefix
NbScoreCols <- names(Data) %>%
str_detect(pattern = Prefix) %>%
sum
if(NbScoreCols==0){
stop(paste0('Could not find columns with prefix',Prefix,' to make the plot'))
}else{
ColNames <- names(Data) %>%
.[str_which(.,pattern = Prefix)]
}
### Checkings on Groups
if(!is.null(EnvGroups)){
if(!is.data.frame(EnvGroups)){
stop('EnvGroups should be a dataframe.')
}else{
EnvGroups <- EnvGroups %>% as.matrix()
EnvInGroups <- map(unique(EnvGroups[,2]) ,~EnvGroups[which(EnvGroups[,2]==.x),1])
ColNames.sel <- EnvInGroups %>%
reduce(c) %>%
paste0(Prefix,.) %>%
intersect(.,ColNames)
NbEnvPerGroups <- map_int(EnvInGroups,length) %>%
cumsum
ColOrder <- FALSE
if(is_empty(ColNames.sel)){
stop('Environments in EnvGroups do not match with column names in Data.')
}
# ColLabel <- rep("",sum(NbEnvPerGroups))
# ColLabel[cumsum(NbEnvPerGroups)%/%2]<- unique(EnvGroups[,2])
}
} else {
ColNames.sel <- ColNames
EnvInGroups <- NULL
NbEnvPerGroups <- c(length(ColNames.sel))
}
### Checkings on QTLs
if(!is.null(QTLsVarName)){
if(!(QTLsVarName %in% colnames(Data))){
stop(paste0(QTLsVarName,' is not a column in Data.'))
}else {
### Ordering the markers
Data <-Data %>% arrange(QTLsVarName)
QTLs <- Data %>% pull(QTLsVarName)
QTLs_names <- unique(QTLs) %>% as.character()
RowSplit <- map_int(QTLs_names,~length(which(QTLs==.x))) %>%
cumsum
RowOrder <- FALSE
Rowlabel <- rep("",sum(RowSplit))
Rowlabel[cumsum(RowSplit)%/%2]<-paste0("QTL_",QTLs_names)
}
}else{
### Ordering the markers
Data <-Data %>% arrange(.data$CHR, .data$POS)
QTLs_names <- character(0)
RowSplit <- NULL
Rowlabel <- rep("",nrow(Data))
}
## Select columns if needed
Dataheat <- Data %>% select(one_of(ColNames.sel))
## Build the HmMatrix
HmMatrix <- Dataheat %>% as.matrix
colnames(HmMatrix) <- colnames(HmMatrix) %>% str_remove_all(pattern = Prefix)
if(!ShowDendrogram | (!RowOrder & !ColOrder)){
dendrogram.plot <- "none"
}else if(RowOrder & ColOrder){
dendrogram.plot <- "both"
}else if(RowOrder){
dendrogram.plot <- "row"
}else{
dendrogram.plot <- "column"
}
## At least 2 markers ?
if(nrow(HmMatrix)<2){
message('At least 2 rows required to draw a heatmap')
}else{
Col.pal <- grDevices::colorRampPalette(Colors)(100)
class(HmMatrix) <- 'numeric'
HM <-suppressWarnings( gplots::heatmap.2(HmMatrix,
col = Col.pal,
Rowv = RowOrder,
Colv = ColOrder,
dendrogram = dendrogram.plot,
sepwidth = c(0.05, 0.3),
trace = 'none',
labRow = Rowlabel,
main = Main,
colsep = NbEnvPerGroups,
rowsep = RowSplit,
density.info = "none",key.title = NA,margins = c(5,7) ))
return(HM)
}
}
#' Draw the Manhattan plot.
#'
#' The function metaGE.manhattan displays the Manhattan plot of the -log10(p-value) or the local score of each marker along the genome.
#' @param Data A dataset containing the columns: CHR, POS, MARKER and the variable to plot for each marker, as obtained from [metaGE.fit()].
#' @param VarName The name of the column containing the variable to plot, generally the p-value or a score.
#' @param Threshold A threshold in order to draw a "genome-wide significant" line. (optional)
#' @param SigZones A dataset containing the significant zones to plot, as obtained from [metaGE.lscore()]. Must have columns: CHR, Start, End. (optional)
#' @param Score A boolean. If \code{FALSE}, the -log10 of the variable is plotted, useful for plotting p-values. If \code{TRUE}, the raw values of the variable is plotted, useful for plotting scores. (\code{FALSE} by default)
#' @param AnnotateMarkers A list of markers name to annotate in the plot. (optional)
#' @param Main The main to display. (optional)
#' @param col A character vector indicating which colors to alternate for different chromosomes. (optional)
#' @param colSigZones A character indicating which color to plot the significant zones.("\code{blue}" by default)
#' @param Ylim Two numeric values, specifying the lower limit and the upper limit of the y-axe scale. (optional)
#' @return The Manhattan plot.
#' @export
#' @importFrom qqman manhattan
#' @importFrom ggrepel geom_label_repel
#' @examples
#' require(dplyr)
#' # Import the data
#' data("metaData")
#'
#' # Compute the inter-environment correlation matrix
#' matCorr <- metaGE.cor(metaData, Threshold = 0.8)
#'
#' # Fit the Fixed Effect model
#' FeDF <- metaGE.fit(metaData, matCorr, Method = "Fe")
#'
#' # Control the FDR (here Benjamini-Hochberg)
#' Alpha <- 0.05
#' Signif <- FeDF$PVALUE %>% p.adjust(method = "BH") %>% `<`(Alpha) %>% which
#'
#' # Draw the corresponding manhattan plot
#' PvalThresholdFe <- FeDF[Signif,]$PVALUE%>% max %>% max(.,0)
#' manhattan_pval <- metaGE.manhattan(Data = FeDF,VarName = 'PVALUE',
#' Threshold = PvalThresholdFe,
#' Main = '-log10(Pval) alongside the chromosome Fe method')
#'
#'
#' # Compute the score local
#' xi <- 2
#' FeScore <- metaGE.lscore(FeDF,"PVALUE", xi)
#'
#' # Draw the corresponding manhattan plot
#' manhattan_lscore <- metaGE.manhattan(Data = FeScore$Data,VarName = 'SCORE',
#' SigZones = FeScore$SigZones, Score = TRUE,
#' Main = 'Local score alongside the chromosome Fe method')
metaGE.manhattan <- function(Data, VarName, Threshold=NULL,SigZones=NULL,Score = FALSE, AnnotateMarkers=NULL, Main='', col=c("grey", "black"), colSigZones='blue', Ylim=NULL){
### Calculate cumulative position for all markers
don <- Data %>%
# Compute chromosome size
group_by(.data$CHR) %>%
summarise(chr_len=max(.data$POS)-min(.data$POS),
chr_min = min(.data$POS)) %>%
# Calculate cumulative position of each chromosome
mutate(tot=cumsum(as.numeric(.data$chr_len))-.data$chr_len) %>%
select(-.data$chr_len) %>%
# Add this info to the initial dataset
left_join(Data, ., by=c("CHR"="CHR")) %>%
# Add a cumulative position of each SNP
arrange(.data$CHR, .data$POS) %>%
mutate( POScum=.data$POS+.data$tot - .data$chr_min + 1)
nbCHR <- length(unique(Data$CHR))
if(!is.null(AnnotateMarkers)){
AnnotateMarker_info <- don %>% filter(.data$MARKER %in% AnnotateMarkers) %>% select(.data$MARKER, .data$POScum)
}
if(!is.null(SigZones)){
### Calculate cumulative position for significant zones
sigzones <- Data %>%
# Compute chromosome size
group_by(.data$CHR) %>%
summarise(chr_len=max(.data$POS)-min(.data$POS),
chr_min = min(.data$POS)) %>%
# Calculate cumulative position of each chromosome
mutate(tot=cumsum(as.numeric(.data$chr_len))-.data$chr_len) %>%
select(-.data$chr_len) %>%
# Add this info to the initial dataset
left_join(SigZones, ., by="CHR") %>%
# Add a cumulative position of each SNP
arrange(.data$CHR, .data$Start) %>%
mutate( Startcum=.data$Start+.data$tot - .data$chr_min + 1,
Endcum=.data$End+.data$tot- .data$chr_min +1)
}
### Compute the legend of the x axis
axisdf <- don %>% group_by(.data$CHR) %>% summarize(center=(max(.data$POScum) + min(.data$POScum)) / 2 )
### Rename the variable to plot
don <- don %>% select(.data$CHR,.data$POS, .data$POScum, all_of(VarName)) %>% rename("VarToPlot"=all_of(VarName))
if(!Score){
### Built the plot
manhattan <- ggplot() +
# Show all points
geom_point(data=don, aes(x=.data$POScum, y=-log10(.data$VarToPlot),color=as.factor(.data$CHR)), alpha=0.8, size=2) +
scale_color_manual(values = rep_len(col, nbCHR)) +
# custom X axis:
scale_x_continuous( labels = axisdf$CHR, breaks= axisdf$center ) + xlab("Chromosome") +
# remove space between plot area and x axis
ylab(paste0("-log10(",VarName,")")) +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
# Add vertical lines to annotate markers
if(!is.null(AnnotateMarkers)){
manhattan <- manhattan + geom_segment(data = AnnotateMarker_info,aes(x = .data$POScum, xend=.data$POScum,y=min(-log10(don$VarToPlot)), yend=Inf),
linewidth=0.8,color= yarrr::transparent("red2", trans.val = .3),linetype=2)
}
# Add a threshold
if(!is.null(Threshold)){
manhattan <- manhattan + geom_hline(yintercept=-log10(Threshold), color = "red2",linewidth=1.5)
}
# Add limit to the y axes
if(is.null(Ylim)){
Ylim <- c(min(-log10(don$VarToPlot)), max(-log10(don$VarToPlot)))
}
# Add the sigzones
if(!is.null(SigZones)){
# Compute chromosome size
width_box <- Data %>% group_by(.data$CHR) %>%
summarise(chr_len=max(.data$POS)-min(.data$POS)) %>%
pull(.data$chr_len) %>% mean() * 0.02
for (i in 1:nrow(sigzones)){
manhattan <- manhattan + geom_rect(data=data.frame(xmin=max(sigzones$Startcum[i]-width_box,0),
xmax=min(sigzones$Endcum[i]+width_box,max(don %>% filter(.data$CHR==sigzones$CHR[i]) %>% pull(.data$POScum))),
ymin=max(0,Ylim[1]),
ymax=min(-log10(sigzones$PvalMin[i]),Ylim[2])),
aes(xmin=.data$xmin,xmax=.data$xmax,ymin=.data$ymin,ymax=.data$ymax),
fill=colSigZones,alpha=0.3)
}
}
}else{
### Built the plot
manhattan <- ggplot() +
# Show all points
geom_point(data=don, aes(x=.data$POScum, y=.data$VarToPlot,color=as.factor(.data$CHR)), alpha=0.8, size=2) +
scale_color_manual(values = rep_len(col, nbCHR )) +
# custom X axis:
scale_x_continuous( labels = axisdf$CHR, breaks= axisdf$center ) + xlab("Chromosome") +
# remove space between plot area and x axis
ylab(VarName) +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
# Add vertical lines to annotate markers
if(!is.null(AnnotateMarkers)){
manhattan <- manhattan + geom_segment(data = AnnotateMarker_info,aes(x = .data$POScum, xend=.data$POScum,y=min(don$VarToPlot), yend=Inf),
linewidth=0.8,color= yarrr::transparent("red2", trans.val = .3),linetype=2)
}
# Add a threshold
if(!is.null(Threshold)){
manhattan <- manhattan + geom_hline(yintercept=Threshold, color = "red2",linewidth=1.5)
}
# Add limit to the y axes
if(is.null(Ylim)){
Ylim <- c(min(don$VarToPlot), max(don$VarToPlot))
}
if(!is.null(SigZones)){
# Compute chromosome size
width_box <- Data %>% group_by(.data$CHR) %>%
summarise(chr_len=max(.data$POS)-min(.data$POS)) %>%
pull(.data$chr_len) %>% mean() * 0.02
for (i in 1:nrow(sigzones)){
manhattan <- manhattan + geom_rect(data=data.frame(xmin=max(sigzones$Startcum[i]-width_box,0),
xmax=min(sigzones$Endcum[i]+width_box,max(don %>% filter(.data$CHR==sigzones$CHR[i]) %>% pull(.data$POScum))),
ymin=max(0,Ylim[1]),
ymax=min(sigzones$LocalScoreMax[i],Ylim[2])),
aes(xmin=.data$xmin,xmax=.data$xmax,ymin=.data$ymin,ymax=.data$ymax),
fill=colSigZones,alpha=0.3)
}
}
}
# Coef <- 2
# TH <- theme(axis.text=element_text(size=10*Coef),
# axis.title=element_text(size=15*Coef),
# plot.title=element_text(size=15*Coef),
# strip.text.x = element_text(size = 20*Coef),
# legend.text=element_text(size=12*Coef),
# legend.title = element_text(size=15*Coef))
return(manhattan+ ylim(Ylim)+ggtitle(Main))
}
#' Plot the z-score of a marker according to a covariate.
#'
#' The function metaGE.regplot displays the graph of the z-scores of a marker according to a covariate.
#' @param Data A dataset containing the columns: MARKER and the z-scores or the effects of each marker (in rows) in each environment (in columns), as obtained from [metaGE.collect()].
#' @param Covariate A dataset containing the values of one or more covariates (in columns) in each environment (in rows).
#' @param EnvName The name of the column containing the names of the environment in the \code{Covariate} dataset.
#' @param MarkerName The name of the marker.
#' @param VarName The name of the column containing the covariable to plot.
#' @param Zscore A boolean. If \code{FALSE}, the estimated marker effects is plotted. If \code{TRUE}, the z-scores of the marker is plotted. (\code{FALSE} by default)
#' @param aesCol The name of the column in the \code{Covariate} dataset containing a qualitative covariable to specify the color of the points. (optional)
#' @param Main The main to display.(optional)
#' @return The plot
#' @export
#' @import ggplot2 viridis
#' @importFrom ggrepel geom_text_repel
#' @examples
#' data("metaData")
#' data("envDesc")
#' metaGE.regplot(Data = metaData, Covariate = envDesc, EnvName = "ShortName",
#' MarkerName = "AX-91369217", VarName = "Tnight.mean", aesCol = "Classification")
#'
metaGE.regplot <- function(Data, Covariate, EnvName, MarkerName, VarName, Zscore=FALSE, aesCol=NULL, Main=''){
#Rename the variable
Covariate <- Covariate %>% rename("ENV" = all_of(EnvName))
Covariate <- Covariate %>% rename("COVARIABLE" = all_of(VarName))
if(!is.null(aesCol)){ Covariate <- Covariate %>% rename("AesCol" = all_of(aesCol))}
#Selection of the marker
SNP_df <- Data %>% filter(.data$MARKER == MarkerName)
#### Jointure with the covariate
if(!Zscore){
SNP_df <- SNP_df %>% select(.data$MARKER, contains("EFFECT.")) %>% rename_with(~str_remove(.x,pattern = 'EFFECT.'))
}
if(Zscore){
SNP_df <- SNP_df %>% select(.data$MARKER, contains("Z.")) %>% rename_with(~str_remove(.x,pattern = 'Z.'))
}
#Reshape in long
SNP_df <- SNP_df %>% gather(key = "ENV", value = "EFFECT", -.data$MARKER)
#Add to snp effect table
SNP_df <- SNP_df %>% left_join(Covariate, ., by=c("ENV"="ENV"))
#### Plot
if(!is.null(aesCol)){
plott <- ggplot(SNP_df, aes(x = .data$COVARIABLE, y = .data$EFFECT, color= factor(.data$AesCol))) +
scale_color_viridis(discrete = TRUE, option = "D")+
labs(color = aesCol)
}else{
plott <- ggplot(SNP_df, aes(x = .data$COVARIABLE, y = .data$EFFECT))
}
plott <- plott +
geom_point(size = 5) +
geom_hline(yintercept = 0 , linetype = 2) +
ggrepel::geom_text_repel(aes(label = .data$ENV), size=7,colour="black")+
xlab(VarName) + ylab("Marker effect") +
ggtitle(Main)
if(Zscore){plott <- plott + ylab("Z-score") }
return(plott)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.