Nothing
## Population Allele Locating Mapmaker v1 16APR2020
#'Population Allele Locating Mapmaker
#'
#'Produces a frequency heatmap for a specified amino-acid motif, based on the allele frequency data in the Solberg dataset.
#'
#'@param filename The filename of the local copy of the Solberg dataset - the defaulted filename is the solberg_dataset in the SSHAARP package.
#'@param motif An amino acid motif in the following format: Locus*##$~##$~##$, where ## identifies a peptide position, and $ identifies an amino acid residue. Motifs can include any number of amino acids.
#'@param color A logical parameter that identifies if the heat maps should be made in color (TRUE) or gray scale (FALSE). The default option is TRUE.
#'@param filterMigrant A logical parameter that determines if admixed populations (OTH) and migrant populations (i.e. any complexities with the 'mig') should be excluded from the dataset. The default option is TRUE.
#'@param direct The directory into which the map produced is written. The default directory is the user's working directory.
#'
#'@importFrom data.table rbindlist
#'@importFrom gmt gmt.system r2gmt
#'@importFrom DescTools RoundTo
#'@importFrom utils sessionInfo
#'@importFrom dplyr filter %>%
#'@importFrom stringr str_extract_all
#'@importFrom filesstrings file.move
#'
#'@note The produced frequency heatmap is generated by using the Generic Mapping Tools (GMT) R Package, which is an interface between R and the GMT Map-Making software.
#'@note The Solberg dataset is the tab-delimited ‘1-locus-alleles.dat’ text file in the results.zip archive at http://pypop.org/popdata/.
#'@note The Solberg dataset is also prepackaged into SSHAARP as 'solberg_dataset'.
#'@note While the map legend identifies the highest frequency value, values in this range may not be represented on the map due to frequency averaging over neighboring populations.
#'@export
#'
#'@return The specified motif and the directory into which the heatmap was written are returned in an invisible character vector. If the user enters a motif that is not found in the Solberg dataset, or that does not exist, a warning message is returned. If an incorrectly formatted motif is entered, or the user does not have the GMT software installed on their operating system, a vector with a warning message is returned. The produced heatmap is written to the user's specified directory (default is user's working directory) as a .jpg file, where the filename is "'motif'.jpg".
#'
#'@examples
#'#example to produce a color frequency heat map where migrant populations are filtered out
#'PALM("DRB1*26F~28E~30Y",filename = solberg_dataset[85:100,], filterMigrant=TRUE)
#'#example to produce a greyscale heat map where migrant populations are not filtered out
#'PALM("DRB1*26F~28E~30Y", filename = solberg_dataset[85:100,], color=FALSE, filterMigrant=FALSE)
#'
#'@references Solberg et.al. (2008) <doi: 10.1016/j.humimm.2008.05.001>
#'
#'
PALM<-function(motif, filename=SSHAARP::solberg_dataset, direct=getwd(), color=TRUE, filterMigrant=TRUE){
wd <- getwd()
on.exit(setwd(wd))
#checks if the directory user entered is valid
if(dir.exists(direct)==FALSE){
return(warning("The directory you specified does not exist. Please enter a valid directory to write your map to."))
}
OS<-strsplit(sessionInfo()$running, " ")[[1]][[1]]
#uses dataSubset to read and manipulate the Solberg dataset
solberg_DS<-suppressWarnings(dataSubset(motif, filename))
#if output of solberg_DS after dataSubset is not a dataframe, it is an error
#return solberg_DS, which contains the error message
if(is.data.frame(solberg_DS)==FALSE){
return(warning(solberg_DS))
}
#filters out migrant populations if filter_migrant==TRUE
if(filterMigrant==TRUE){
#filters out admixed, migrant OTH populations
solberg_DS<-solberg_DS[solberg_DS$contin!="OTH",]
#filters out migrant populations in complexity column
solberg_DS<-solberg_DS[which((grepl("mig", solberg_DS$complex))==FALSE),]}
if(filterMigrant==FALSE){
solberg_DS<-solberg_DS}
fMresults<-suppressWarnings(findMotif(motif))
if(is.vector(fMresults)==TRUE){
return(warning(fMresults))
}
else{
#makes an empty list named unique_AWM, where the name of each element is after a unique AWM,
#which is acquired by using the motif_finder function
unique_AWM<-sapply(unique(fMresults$trimmed_allele), function(x) NULL)}
if(length(unique_AWM)==0){
return(warning(paste(motif, "No alleles possess this motif", sep =" : ")))
}
#finds unique_AWMs in Solberg dataset and extracts the allele frequency and locus_allele column
for(y in 1:length(unique_AWM)){
unique_AWM[[y]]<-solberg_DS[,c(2,3,4,5,6,11,14)][solberg_DS$locus_allele %in% names(unique_AWM[y]),]}
#subsets out locus_allele pairs with the motif from the alignment but aren't present in the Solberg ds
unique_AWM<-unique_AWM[sapply(unique_AWM, nrow)>0]
if(length(unique_AWM)==0){
return(warning(paste(motif, "No alleles possess this motif", sep =" : ")))
}
#melts dataframes in list into one big dataframe
unique_AWM<-rbindlist(unique_AWM)
#reorders dataframe based on popname alphabetical order
unique_AWM<-unique_AWM[order(unique_AWM$popname),]
#makes row names NULL so they are back in sequential enumeration
row.names(unique_AWM)<-NULL
#creates a variable named Population Allele Frequencies (PAF), where each element is named after a
#unique popname for the targeted locus
#makes each element a list to take in allele frequencies in the next for loop
PAF<-sapply(unique(solberg_DS$popname), function(x) NULL)
#for loop for finding allele frequencies for each named element in PAF
#if multiple entries are present for a given population name, the allele frequencies are added up
#if the population does not have the motif, the allele frequency is 0, since the entry is not found in unique_AWM
for(i in 1:length(PAF)){
PAF[[i]]<-sum(as.numeric(unique_AWM$allele.freq[which((names(PAF[i])==unique_AWM$popname))]))}
#melts PAF into a two columned df
PAF<-data.frame(allele_freq = unlist(PAF),
popname = rep(names(PAF), sapply(PAF, length)), row.names = NULL, stringsAsFactors = F)
#merges PAF with solberg_DS data to a new variable, tbm_ds
#which contains summed up allele frequencies, and relevant contin, complex, locus*allele, and coordinate information
tbm_ds<-merge(PAF, solberg_DS[!duplicated(solberg_DS$popname),], by="popname")[,c("popname","contin", "complex", "latit", "longit", "allele_freq")]
#specifies certain rows from motif_map_df to go into a new variable, gmt_converted_data
gmt_converted_data<-tbm_ds[,c(1,4,5,6)]
#rerranges format
gmt_converted_data<-gmt_converted_data[,c("longit", "latit", "allele_freq", "popname")]
#if max allele frequency has more than 3 leading 0's after the decimal point, return message that map cannot be made
if((length(str_extract_all(strsplit(as.character(max(gmt_converted_data$allele_freq)), ".", fixed = T)[[1]][[2]], "0")[[1]])>=3) & (strsplit(as.character(max(gmt_converted_data$allele_freq)), ".", fixed = T)[[1]][[1]]==0)){
#obtain max allele information to see if it is large enough to produce an accurate map for
max_allele_info<-gmt_converted_data %>% filter(gmt_converted_data$allele_freq==max(gmt_converted_data$allele_freq))
#convert latit and longit back to
ifelse(grepl("-", max_allele_info$longit)==TRUE, max_allele_info$longit<-paste(max_allele_info$longit, "W", sep=""), max_allele_info$longit<-paste(max_allele_info$longit, "E", sep=""))
max_allele_info$longit<-gsub("-", "", max_allele_info$longit)
ifelse(grepl("-", max_allele_info$latit)==TRUE, max_allele_info$latit<-paste(max_allele_info$latit, "S", sep=""), max_allele_info$latit<-paste(max_allele_info$latit, "N", sep=""))
max_allele_info$latit<-gsub("-", "", max_allele_info$latit)
return(paste(paste(paste("The motif you entered has a maximum allele frequency of", max_allele_info$allele_freq), "which is too low of a frequency to produce an accurate map for.", sep=", "), "There are/is", nrow(gmt_converted_data), "population/populations at or below this frequency. The most frequent is found at", max_allele_info$longit, max_allele_info$latit,"."))
}
#set wd to tempdir() to store output files
setwd(tempdir())
#Windows specific arguments on extracting motif and allele frequency information
if(OS=="Windows"){
max_allelefreq<-max(sort(gmt_converted_data$allele_freq))
#calls function to convert R object gmt_converted_data to GMT formatted data
#outputs converted data into local environment
write(max_allelefreq, "upperbound")
#changes asterisk to '-' in filename since *s are forbidden in Windows file names
write(gsub("\\*", "-", motif), "filename")
}
#writes motif to working directory so bash script can extract
#and use it as the title for the map
write(motif, "motif")
#calls function to convert R object gmt_converted_data to GMT formatted data
#outputs converted data into local environment
r2gmt(gmt_converted_data, "motif.xyz")
#checks if user's machine has GMT software installed
#if not, an error message is returned and the function stops
gmtOut<-tryCatch({gmt.system("gmt blockmean motif.xyz -R-180/180/-60/80 -I3 > motif.block")}, error=function(cond){gmtOut<<-"Error"})
if(length(gmtOut)==1){
return("The GMT software is not installed at the level of your operating system. Please install GMT from http://gmt.soest.hawaii.edu/projects/gmt/wiki/Installing in order to use this function. Refer to the vignette for more information.")
}
#finds blockmean of data
#-60 for lower bound lat
#+80 for upper bound lat
#I for every 3x3 mean value, as obtained from gmt.sh file
gmt.system("gmt blockmean motif.xyz -R-180/180/-60/80 -I3 > motif.block")
#grids table using surface
gmt.system("gmt surface motif.block -R-180/180/-60/80 -Gmotif.grd -I0.5 -T.7 -Ll0")
###############WINDOWS SPECIFIC COMMAND LINE ARGUMENTS FOR MAP GENERATION###############
if(OS=="Windows"){
#creates basemap with correct motif as title, and output file as the motif but with a '-'
#separating the locus and amino acid combination; this is done because asterisks are
#forbidden in Windows naming conventions
shell("for /f 'usebackq' %C in (`type filename`) do for /f 'usebackq' %A in (`type motif`) do gmt psbasemap -JM6i -R-180/180/-60/80 -B0 -B+t%A -K >> %C.ps")
if(color==TRUE){
#uses white background to fill landmasses if color=T
shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -G200 -W0.25p -O -K >> %C.ps")
}
#uses a hashed background to fill landmasses if color=F
if(color==FALSE){
shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -Gp61 -W0.25p -O -K >> %C.ps")
}
#uses readLines to obtain rounded upperbound information; rounds the last digit to the nearest 0 or 5
upper_round<-shortRoundUp(as.numeric(readLines("upperbound")))
upper_decile<- shortRoundUp(as.numeric(readLines("upperbound"))/10)
#the Solberg dataset has some rounding errors that produce an allele frequency greater than 1;
#for those populations, use a set 1.000 and 0.1 for the decile interval
if(max(gmt_converted_data$allele_freq) >=1){
upper_round<-as.numeric("1")
upper_decile<-as.numeric("0.1")
}
#writes max allelic frequency (rounded) and increment needed to form deciles
write(upper_round, "upper_round")
write(upper_decile, "upper_decile")
#color palette generation
#uses seis color palette if color=T
if(color==TRUE){
#makes custom CPT with increments of max_frequency/10 for deciles
#specifically calls on max_cpt for max frequency and decile increments
shell("for /f 'usebackq' %E in (`type upper_decile`) do for /f 'usebackq' %D in (`type upper_round`) do gmt makecpt -Cseis -Iz -T0/%D/%E > decile.cpt")
}
#uses grayscale color palette if color=F
if(color==FALSE){
shell("for /f 'usebackq' %E in (`type upper_decile`) do for /f 'usebackq' %D in (`type upper_round`) do gmt makecpt -Cgray -Iz -T0/%D/%E > decile.cpt")
}
#creates a vector called decile_interval, which gives decile increments based on upper_round and upper_decile information
decile_interval<-seq(0, upper_round, upper_decile)
#writes decile interval without any line breaks
cat(decile_interval, file="deciles")
#use smaller font size if max allele frequency has more digits
if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) >=5){
#adds color scale to basemap based on cpt provided -- makes font on legend smaller
shell("for /f 'usebackq' %C in (`type filename`) do gmt psscale -D0.1i/1.1i/2i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=6.5p,Helvetica,black -Np -L -O -K >> %C.ps")
}
if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) <5){
#adds color scale to basemap based on cpt provided -- makes font on legend smaller
shell("for /f 'usebackq' %C in (`type filename`) do gmt psscale -D0.1i/1.1i/2i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=10.0p,Helvetica,black -Np -L -O -K >> %C.ps")
}
shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -Gc -O -K >> %C.ps")
#clips/masks map areas with no data table coverage -- radius of influence increased to 900 km
shell("for /f 'usebackq' %C in (`type filename`) do gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -O -K >> %C.ps")
#grids .grd file onto map
shell("for /f 'usebackq' %C in (`type filename`) do gmt grdimage motif.grd -Cdecile.cpt -JM6i -R-180/180/-60/80 -O -K >> %C.ps")
#makes a contour map using a .grd file
shell("for /f 'usebackq' %C in (`type filename`) do gmt grdcontour motif.grd -JM6i -Cdecile.cpt -A- -R-180/180/-60/80 -O -K >> %C.ps")
#plots longtitude/latitude coordinates onto basemap
shell("for /f 'usebackq' %C in (`type filename`) do gmt psxy motif.block -R-180/180/-60/80 -JM6i -A -G255 -W0.5p -Sc.05 -O -K >> %C.ps")
#calls psmask again to terminate clip path with -C parameter
shell("for /f 'usebackq' %C in (`type filename`) do gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -C -O -K >> %C.ps")
#calls pcoast again to re-establish coastlines and -Q parameter to quit clipping
shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -W0.5 -O -Q >> %C.ps")
#converts ps map to jpg
#requires Ghostscript in order to execute command
shell("for /f 'usebackq' %C in (`type filename`) do gmt psconvert %C.ps -A -Tj -P -Qg4 -E2000")
motif <-gsub("*", "-", motif, fixed=T)
}
###############MAC SPECIFIC COMMAND LINE ARGUMENTS FOR MAP GENERATION###############
else{
#creates basemap with correct motif
gmt.system("gmt psbasemap -JM6i -R-180/180/-60/80 -B0:.`cat motif`: -K > `cat motif`.ps")
#uses white background to fill landmasses if color=T
if(color==TRUE){
#overlays relevant continents onto basemap
gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -G200 -W0.25p -O -K >> `cat motif`.ps")}
#uses a hashed background to fill landmasses if color=F
if(color==FALSE){
gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -Gp61 -W0.25p -O -K >> `cat motif`.ps")
}
#gets upperbound for allele frequencies
gmt.system("awk '{print $3}' motif.xyz | sort -r | head -1 > upperbound")
#the Solberg dataset has some rounding errors that produce an allele frequency greater than 1;
#for those populations, use a set 1.000 and 0.1 for the decile interval
if(max(gmt_converted_data$allele_freq) >=1){
cpt_interval<-c(1.000, 0.1)
}
#if the max allele frequency for a motif is under 1, use shortRoundUp, which rounds the last
#digit in a decimal to either a 5 or 10
if(max(gmt_converted_data$allele_freq) <1){
#makes a vector called cpt_interval which contains max allelic information and decile increment needed
#to form deciles
#uses readLines to obtain upperbound information from bash, and rounds it to the nearest 0.5
#uses readLines to obtain decile needed
cpt_interval<-c(shortRoundUp(as.numeric(readLines("upperbound"))), shortRoundUp(as.numeric(readLines("upperbound"))/10))
}
#creates a vector called decile_interval, which gives decile increments based on cpt_interval information
decile_interval<-seq(0, cpt_interval[1], cpt_interval[2])
#writes max allelic frequency (rounded) and increment needed to form deciles
write(cpt_interval, "max_cpt")
#writes decile interval without any line breaks
cat(decile_interval, file="deciles", sep=",")
#uses seis color palette if color=T
if(color==TRUE){
#makes custom CPT with increments of max_frequency/10 for deciles
#specifically calls on max_cpt for max frequency and decile increments
gmt.system("gmt makecpt -Cseis -Iz -T0/`awk '{print $1}' max_cpt`/`awk '{print $2}' max_cpt` > decile.cpt")
}
#uses grayscale color palette if color=F
if(color==FALSE){
gmt.system("gmt makecpt -Cgray -Iz -T0/`awk '{print $1}' max_cpt`/`awk '{print $2}' max_cpt` > decile.cpt")
}
#use smaller font size if max allele frequency has more digits
if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) >=5){
gmt.system("gmt psscale -D0.1i/1.1i/2i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=6.5p,Helvetica,black -Np -L -O -K >> `cat motif`.ps")
}
if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) <5){
#adds color scale to basemap based on cpt provided -- makes font on legend smaller
gmt.system("gmt psscale -D0.1i/1.1i/2i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=10p,Helvetica,black -Np -L -O -K >> `cat motif`.ps")}
#overlays more coastlines with pscoast
gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -Gc -O -K >> `cat motif`.ps")
#clips/masks map areas with no data table coverage -- radius of influence increased to 900 km
gmt.system("gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -O -K >> `cat motif`.ps")
#grids .grd file onto map
gmt.system("gmt grdimage motif.grd -Cdecile.cpt -JM6i -R-180/180/-60/80 -O -K >> `cat motif`.ps")
#makes a contour map using a .grd file
gmt.system("gmt grdcontour motif.grd -JM6i -Cdecile.cpt -A- -R-180/180/-60/80 -O -K >> `cat motif`.ps")
#plots longtitude/latitude coordinates onto basemap
gmt.system("gmt psxy motif.block -R-180/180/-60/80 -JM6i -A -G255 -W0.5p -Sc.05 -O -K >> `cat motif`.ps")
#calls psmask again to terminate clip path with -C parameter
gmt.system("gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -C -O -K >> `cat motif`.ps")
#calls pcoast again to re-establish coastlines and -Q parameter to quit clipping
gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -W0.5 -O -Q >> `cat motif`.ps")
#converts ps map to jpg -- saves into local environment
#requires Ghostscript in order to execute command
gmt.system("gmt psconvert `cat motif`.ps -A -Tj -P -Qg4 -E2000")
}
#move final output of motif.jpg directory specified by the direct parameter; default is
#set to the user's working directory
#silence file.move message
suppressMessages(file.move(paste(tempdir(), .Platform$file.sep , motif, ".jpg", sep=""), direct, overwrite = T))
invisible(paste(motif,".jpg written to ",direct,".",sep=""))
}
shortRoundUp <- function(frVal){
valPlus <- (5-as.numeric(substr(as.character(frVal),nchar(as.character(frVal)),nchar(as.character(frVal)))))
return(ceiling(frVal*10^(nchar(as.character(frVal))-2)+ifelse(valPlus<0,valPlus+5,valPlus))/(10^(nchar(as.character(frVal))-2)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.