R/PALM.R

Defines functions shortRoundUp PALM

Documented in PALM

## Population Allele Locating Mapmaker v2.0.8 16JUNE2025
#'Population Allele Locating Mapmaker
#'
#'Produces a frequency heatmap for a specified allele, amino-acid motif, or haplotype based on the allele frequency data in the Solberg dataset.
#'
#'@param variant An allele or an amino acid motif in the following format: Locus*##$~##$~##$, where ## identifies a peptide position, and $ identifies an amino acid residue. Motifs can include any number of amino acids. Haplotypes must contain alleles that follow the aforementioned format, and must be delimited by "~".
#'@param variantType Specifies whether the variant is an allele, motif, or haplotype.
#'@param filename The full file path of the user specified dataset if the user wishes to use their own file, or the pre-bundled Solberg dataset or mock haplotype dataset. User provided datasets must be a .dat, .txt, or.csv file, and must conform to the structure and format of the datasets bundled with the package. Allele and motif datasets should follow the Solberg dataset format, and haplotype datasets should follow the SSHAARP haplotype mock data format.
#'@param mask A logical parameter that determines if areas with little to no population coverage should be masked. The default value is set to FALSE.
#'@param color A logical parameter that identifies if the heat maps should be made in color (TRUE) or gray scale (FALSE). The default value is TRUE.
#'@param filterMigrant A logical parameter that determines if admixed populations (OTH) and migrant populations (i.e. any complexities with the 'mig') should be excluded from the dataset. The default value is TRUE.
#'@param mapScale A logical parameter that determines if the max frequency value of the map scale should be 1 (FALSE), or if it should represent the maximum frequency of the chosen motif, allele, or haplotype (TRUE). The default value is TRUE.
#'@param direct The directory into which the map produced is written. The default directory is the user's working directory.
#'@param AFND A logical parameter that determines whether the user specified dataset is data from AFND. This parameter is only relevant if haplotype maps are being made. Default is TRUE.
#'@param generateLowFreq A logical parameter that determines whether maps should be generated for a variant if the maximum frequency for the variant is low frequency. Low frequency populations are defined as those with a frequency of 0.000, indicating three zeros after the decimal point.
#'@param resolution An integer for raster resolution in dpi for the final map output. It is not recommended to go below 400. Default is set to 500.
#'
#'@importFrom data.table rbindlist
#'@importFrom gmt gmt.system r2gmt
#'@importFrom DescTools RoundTo
#'@importFrom utils sessionInfo globalVariables
#'@importFrom dplyr filter %>% group_by summarise .data
#'@importFrom stringr str_extract_all
#'@importFrom filesstrings file.move
#'@importFrom HLAtools getField
#'
#'@note IMGT protein alignments will be generated for the locus of the specified variant the first time PALM is executed for a given locus. The alignments will be saved to the temp directory and referenced by PALM. PALM checks if the locus specific alignment is present in the temp directory; if it is not, a protein alignment object will be built for the locus. Restarting the R session will remove existing alignments.
#'@note The produced frequency heatmap is generated by using the Generic Mapping Tools (GMT) R Package, which is an interface between R and the GMT map making software.
#'@note The Solberg dataset is the tab-delimited ‘1-locus-alleles.dat’ text file in the results.zip archive at http://pypop.org/popdata/.
#'@note The Solberg dataset is also prepackaged into SSHAARP as 'solberg_dataset'.
#'@note A mock haplotype dataset modeled after the AFND network's haplotype dataset structure is available for usage under "mock_haplotype_dataset".
#'@note While the map legend identifies the highest frequency value, values in this range may not be represented on the map due to frequency averaging over neighboring populations.
#'
#'@export
#'
#'@return The specified motif and the directory into which the heat map was written are returned in an invisible character vector. Otherwise, a warning message is returned.
#'
#'@examples
#'#Example to produce a motif color map where migrant populations are filtered out, mask is off
#'\dontrun{PALM("DRB1*26F~28E~30Y",
#'variantType="motif",
#'mask = FALSE,
#'filterMigrant=TRUE,
#'filename = SSHAARP::solberg_dataset)}
#'
#'#Example to produce an allele greyscale map where migrant populations are not filtered, mask is on
#'\dontrun{PALM("DRB1*01:01",
#'variantType="allele",
#'mask = TRUE, color=FALSE,
#'filterMigrant=FALSE,
#'filename = SSHAARP::solberg_dataset)}
#'
#'#Example to produce a color allele map with mapScale T and the allele has more than 2 fields
#'\dontrun{PALM("DRB1*01:01:01",
#'variantType="allele",
#'filterMigrant=FALSE,
#'mapScale=TRUE,
#'filename = SSHAARP::solberg_dataset)}
#'
#'#Example to produce a color haplotype map with default parameters with the mock haplotype dataset
#'\dontrun{PALM("DRB1*01:01~A*01:01",
#'variantType = "haplotype",
#'filename = SSHAARP::mock_haplotype_dataset)}
#'
#'@references Solberg et.al. (2008) <doi: 10.1016/j.humimm.2008.05.001>
#'

PALM<-function(variant, variantType, filename, mask=FALSE, color=TRUE, filterMigrant = TRUE, mapScale = TRUE, direct= getwd(), AFND=TRUE, generateLowFreq = TRUE, resolution = 500){

  vTBool<-validateVariantType(variantType)

  if(!vTBool){
    return(warning("The entered value for the 'variantType' parameter is not valid. Please enter a valid value."))
  }

  wd <- getwd()
  on.exit(setwd(wd))

  #checks if the directory user entered is valid
  if(dir.exists(direct)==FALSE){
    return(warning("The directory you specified does not exist. Please enter a valid destination directory for maps."))
  }

  fileCheck<-readFilename(filename, variant)
  if(!is.data.frame(fileCheck)){
    return(fileCheck)
  }

  OS<-strsplit(sessionInfo()$running, " ")[[1]][[1]]

  alignFile<-paste(tempdir(), '/', 'SSHAARP_alignments.rda', sep = '')

  originalAlignment<-list()

  #build alignment if not in temp directory; else load IMGT protein alignments
  #make globally available to all other functions
  if(length(list.files(tempdir(), pattern = alignFile)) != 0){
    load(alignFile)
    originalAlignment<-HLAalignments
  } else{
    HLAalignments<-list()
  }

  if(variantType %in% c('allele', 'motif')){
      align_var<-variant
  } else{
      align_var<-strsplit(variant, '~')[[1]]
  }

  #if locus is not present in the HLAalignments list, create protein alignment
  #and add it to the list object
  for(i in align_var){

    locus<-getVariantInfo(i)[[1]]

    if(!locus %in% names(HLAalignments)){

      alignmentOut<-buildAlignments(locus, source = 'AA')[[1]]

      #exit PALM() if warning message is returned from buildAlignments due to
      #locus not being found in ANHIG alignments
      if(!is.list(alignmentOut)){
        return(invisible(NULL))
      }

      HLAalignments[[locus]]<-alignmentOut$AA
    }
  }

  #if any new loci were added to HLAalignments, save the object into a .rda file again
  if(length(originalAlignment)!=length(HLAalignments)){
    save(HLAalignments, file = alignFile)
    }

  if(variantType %in% c("motif", "allele")){

    dataset <- dataSubset(variant, filename)

    #if output of dataset after dataSubset is not a dataframe, it is an error
    #return dataset, which contains the error message
    if(is.data.frame(dataset)==FALSE){
      return(warning(dataset[[2]]))
    }
  }

  if(variantType == "haplotype"){

    dataset <- dataSubsetHaplo(variant, filename, AFND, HLAalignments)

    #if output of dataset after dataSubsetHaplo is not a list, then there is
    #an error
    if(is.list(dataset)==FALSE){
      return(warning(dataset[[2]]))
    }
  }

  if(variantType %in% c("motif", "allele")){

    #filters out migrant populations if filter_migrant==TRUE
    if(filterMigrant==TRUE){
      #filters out admixed, migrant OTH populations
      dataset<-dataset[dataset$contin!="OTH",]

      #filters out migrant populations in complexity column
      dataset<-dataset[which((grepl("mig", dataset$complex))==FALSE),]}

  }

  if(variantType=="motif"){

    fMresults<- findMotif(variant, filename, HLAalignments)

    if(is.vector(fMresults)==TRUE){
      return(warning(fMresults[[2]]))
    }

    else{
      #makes an empty list named unique_AWM, where the name of each element is after a unique AWM,
      #which is acquired by using the motif_finder function
      unique_AWM<-sapply(unique(fMresults$trimmed_allele), function(x) NULL)
    }

    #if(length(unique_AWM)==0){
    # return(warning(paste(variant, "No alleles possess this motif", sep =" : ")))
    #}

    #finds unique_AWMs in Solberg dataset and extracts the allele frequency and locus_allele column
    for(y in 1:length(unique_AWM)){
      unique_AWM[[y]]<-dataset[,c(2,3,4,5,6,11,14)][dataset$locus_allele %in% names(unique_AWM[y]),]
    }

    #subsets out locus_allele pairs with the motif from the alignment but aren't present in the input ds
    unique_AWM<-unique_AWM[sapply(unique_AWM, nrow)>0]

    if(length(unique_AWM)==0){
      return(warning(paste(variant, "No alleles in the user input dataset possess this motif", sep =": ")))
    }

    #melts dataframes in list into one big dataframe
    unique_AWM<-rbindlist(unique_AWM)

    #reorders dataframe based on popname alphabetical order
    unique_AWM<-unique_AWM[order(unique_AWM$popname),]

    #makes row names NULL so they are back in sequential enumeration
    row.names(unique_AWM)<-NULL

    motif_sum<-unique_AWM %>%
      group_by(.data$popname) %>%
      summarise(sum = sum(as.numeric(.data$allele.freq)))

    PAF<-data.frame(allele_freq = motif_sum$sum,
                    popname = motif_sum$popname, row.names = NULL, stringsAsFactors = F)

    noAllelePops<-NULL

    if(any(unique(dataset$popname) %in% PAF$popname==FALSE)){

      noAllelePops<-data.frame(popname=unique(dataset$popname)[which(!unique(dataset$popname) %in% PAF$popname)],
                               allele_freq=0, stringsAsFactors = F, row.names=NULL)
    }

    PAF<-rbind(PAF, noAllelePops)

    PAF<-PAF[order(PAF$popname),]

    #merges PAF with dataset data to a new variable, tbm_ds
    #which contains summed up allele frequencies, and relevant contin, complex, locus*allele, and coordinate information
    tbm_ds<-merge(PAF, dataset[!duplicated(dataset$popname),], by="popname")[,c("popname","contin", "complex", "latit", "longit", "allele_freq")]

    #specifies certain rows from motif_map_df to go into a new variable, gmt_converted_data
    gmt_converted_data<-tbm_ds[,c(1,4,5,6)]

    #rerranges format
    gmt_converted_data<-gmt_converted_data[,c("longit", "latit", "allele_freq", "popname")]

  }

  endMess<-NULL

  if(variantType == "allele"){

    vAresults<-verifyAlleleDataset(variant, filename, HLAalignments)

    if(grepl(TRUE, vAresults[[1]])==FALSE){
      return(warning(vAresults[[2]]))
    }

    PAF<-data.frame(allele_freq = vAresults[[3]]$allele.freq,
                    popname = vAresults[[3]]$popname, row.names = NULL, stringsAsFactors = F)

    noAllelePops<-NULL

    if(any(unique(dataset$popname) %in% PAF$popname==FALSE)){

      noAllelePops<-data.frame(popname=unique(dataset$popname)[which(!unique(dataset$popname) %in% PAF$popname)],
                               allele_freq=0, stringsAsFactors = F, row.names=NULL)
    }

    PAF<-rbind(PAF, noAllelePops)

    PAF<-PAF[order(PAF$popname),]

    tbm_ds<-merge(PAF, dataset[!duplicated(dataset$popname),], by="popname")[,c("popname","contin", "complex", "latit", "longit", "allele_freq")]

    gmt_converted_data<-tbm_ds[,c("longit", "latit", "allele_freq", "popname")]

    #rename allele.freq to allele_freq to match syntax
    colnames(gmt_converted_data)[[3]]<-"allele_freq"

    #if endMess is present in vAresults, truncate allele to two fields
    if(length(vAresults[[2]])!=0){

      variant<-getField(variant, 2, TRUE)

    }
  }

  if(variantType == "haplotype"){

    #sum frequencies based on pop_id
    freq_sm<-dataset[[1]] %>%
      group_by(.data$pop_id) %>%
      summarise(freq = sum(.data$frequency))

    PAF<-data.frame(allele_freq = freq_sm$freq,
                    popname = freq_sm$pop_id, row.names = NULL, stringsAsFactors = F)

    noAllelePops<-NULL

    if(any(unique(dataset[[2]]$pop_id) %in% PAF$popname==FALSE)){

      noAllelePops<-data.frame(popname=unique(dataset[[2]]$pop_id)[which(!unique(dataset[[2]]$pop_id) %in% PAF$popname)],
                               allele_freq=0, stringsAsFactors = F, row.names=NULL)

    }

    PAF<-rbind(PAF, noAllelePops)

    tbm_ds<-merge(PAF, dataset[[2]][!duplicated(dataset[[2]]$pop_id),], by.x ="popname", by.y="pop_id")[,c("popname", "allele_freq", "Lat", "Long")]

    gmt_converted_data<-tbm_ds[,c("Long", "Lat", "allele_freq", "popname")]

    colnames(gmt_converted_data)[1:2]<-c("longit","latit")

  }

  #if max allele frequency has more than 3 leading 0's after the decimal point, return message that map cannot be made
  if(generateLowFreq == FALSE){
    if((length(str_extract_all(strsplit(as.character(format(max(gmt_converted_data$allele_freq), scientific = FALSE)), ".", fixed = T)[[1]][[2]], "0")[[1]])>=3) & (strsplit(as.character(max(gmt_converted_data$allele_freq)), ".", fixed = T)[[1]][[1]]==0)){
      #obtain max allele information to see if it is large enough to produce an accurate map for
      max_allele_info<-gmt_converted_data %>% filter(gmt_converted_data$allele_freq==format(max(gmt_converted_data$allele_freq)), scientific = FALSE)

      #convert latit and longit back to
      ifelse(grepl("-", max_allele_info$longit)==TRUE, max_allele_info$longit<-paste(max_allele_info$longit, "W", sep=""), max_allele_info$longit<-paste(max_allele_info$longit, "E", sep=""))
      max_allele_info$longit<-gsub("-", "", max_allele_info$longit)

      ifelse(grepl("-", max_allele_info$latit)==TRUE, max_allele_info$latit<-paste(max_allele_info$latit, "S", sep=""), max_allele_info$latit<-paste(max_allele_info$latit, "N", sep=""))
      max_allele_info$latit<-gsub("-", "", max_allele_info$latit)

      return(paste(paste(paste("The variant you entered has a maximum allele frequency of", max_allele_info$allele_freq), "which is too low of a frequency to produce an accurate map for.", sep=", "), "There are/is", nrow(gmt_converted_data), "population/populations at or below this frequency. The most frequent is found at", max_allele_info$longit, max_allele_info$latit,"."))
    }
  }

  upperbound<-as.numeric(max(sort(gmt_converted_data$allele_freq)))

  if(upperbound==0){
    return(warning(paste('The max allele frequency for', variant, 'is 0. A map cannot be made for this variant.')))
  }

  if(mapScale == TRUE){
    if(upperbound < 0.05){
      upper_round<-shortRoundUp(upperbound)
    } else{
      upper_round<-upperbound
    }
    upper_decile<-upper_round/10
    decile_interval<-seq(0, upper_round, upper_decile)
  }

  if(mapScale == FALSE | upperbound >=1){
    decile_interval<-seq(0, 1, 0.1)
    upper_round<-1
    upper_decile<-0.1
  }

  #set wd to tempdir() to store output files
  setwd(tempdir())

  #write allele without any modifications to name as map title
  #to be used as title for generated allele map
  write(variant, "variant")

  #Windows specific arguments on extracting filename
  if(OS=="Windows"){

    #changes asterisk to '-' in filename since *s are forbidden in Windows file names
    #changes colon to unicode colon for file naming purposes
    variant<-gsub(":", ";", gsub("\\*", "-", variant))

    write(variant, "filename")
  }


  #calls function to convert R object gmt_converted_data to GMT formatted data
  #outputs converted data into local environment
  r2gmt(gmt_converted_data, "motif.xyz")

  #checks if user's machine has GMT software installed
  #if not, an error message is returned and the function stops
  gmtOut<-tryCatch({gmt.system("gmt blockmean motif.xyz -R-180/180/-60/80 -I4 > motif.block")}, error=function(cond){gmtOut<<-"Error"})

  if(length(gmtOut)==1){
    return("The GMT software is not installed at the level of your operating system. Please install GMT from http://gmt.soest.hawaii.edu/projects/gmt/wiki/Installing in order to use this function. Refer to the vignette for more information.")
  }

  #finds blockmean of data
  #-60 for lower bound lat
  #+80 for upper bound lat
  #I for every 4x4 mean value, as obtained from gmt.sh file
  gmt.system("gmt blockmean motif.xyz -R-180/180/-60/80 -I4 > motif.block")

  #grids table using surface
  gmt.system("gmt surface motif.block -R-180/180/-60/80 -Gmotif.grd -I0.5 -T.7 -Ll0")


  ###############WINDOWS SPECIFIC COMMAND LINE ARGUMENTS FOR MAP GENERATION###############

  if(OS=="Windows"){

    #creates basemap with correct variant as title, and output file as the variant but with a '-'
    #separating the locus and amino acid combination; this is done because asterisks are
    #forbidden in Windows naming
    invisible(shell("for /f 'usebackq' %C in (`type filename`) do for /f 'usebackq' %A in (`type variant`) do gmt psbasemap -JM6i -R-180/180/-60/80 -B0 -B+t%A -K >> %C.ps", intern=T))

    if(color==TRUE){
      #uses white background to fill landmasses if color=T
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -G200 -W0.25p -O -K >> %C.ps", intern=T))
    }

    #uses a hashed background to fill landmasses if color=F
    if(color==FALSE){
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -Gp61 -W0.25p -O -K >> %C.ps", intern=T))
    }

    #writes max allelic frequency (rounded) and increment needed to form deciles
    write(upper_round, "upper_round")
    write(upper_decile, "upper_decile")

    #color palette generation
    #uses seis color palette if color=T
    if(color==TRUE){
      #makes custom CPT with increments of max_frequency/10 for deciles
      #specifically calls on max_cpt for max frequency and decile increments
      invisible(shell("for /f 'usebackq' %E in (`type upper_decile`) do for /f 'usebackq' %D in (`type upper_round`) do gmt makecpt -Cseis -Iz -T0/%D/%E > decile.cpt", intern=T))
    } else{
      invisible(shell("for /f 'usebackq' %E in (`type upper_decile`) do for /f 'usebackq' %D in (`type upper_round`) do gmt makecpt -Cgray -Iz -T0/%D/%E > decile.cpt", intern=T))
    }

    #use smaller font size if max allele frequency has more digits
    if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) >=5){
      #adds color scale to basemap based on cpt provided -- makes font on legend smaller
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt psscale -Dx0.1i/0.9i/1.5i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=14p,Helvetica,black -Np -L -O -K >> %C.ps", intern=T))
    }

    if(nchar(strsplit(as.character(decile_interval[2]), ".", fixed=T)[[1]][[2]]) <5){
      #adds color scale to basemap based on cpt provided -- makes font on legend smaller
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt psscale -Dx0.1i/0.9i/1.5i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=16p,Helvetica,black -Np -L -O -K >> %C.ps", intern=T))
    }

    invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -Gc -O -K >> %C.ps", intern=T))

    if(mask==TRUE){
      #clips/masks map areas with no data table coverage -- radius of influence increased to 900 km
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -O -K >> %C.ps", intern=T))
    }

    #grids .grd file onto map
    invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt grdimage motif.grd -Cdecile.cpt  -JM6i -R-180/180/-60/80 -O -K >> %C.ps", intern=T))


    #makes a contour map using a .grd file
    invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt grdcontour motif.grd -JM6i -Cdecile.cpt -A- -R-180/180/-60/80 -O -K >> %C.ps", intern=T))

    #plots longtitude/latitude coordinates onto basemap
    invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt psxy motif.block -R-180/180/-60/80 -JM6i -A -G255 -W0.5p -Sc.05 -O -K >> %C.ps", intern=T))

    if(mask == TRUE){
      #calls psmask again to terminate clip path with -C parameter
      invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt psmask motif.xyz -R-180/180/-60/80 -I3 -JM6i -S850k -C -O -K >> %C.ps", intern=T))
    }

    #calls pcoast again to re-establish coastlines and -Q parameter to quit clipping
    invisible(shell("for /f 'usebackq' %C in (`type filename`) do gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -W0.5 -O -Q  >>  %C.ps", intern=T))

    #converts ps map to jpg
    #requires Ghostscript in order to execute command
    raster<-paste("for /f 'usebackq' %C in (`type filename`) do gmt psconvert %C.ps -Tj -P -E", resolution, sep = '')

    invisible(shell(raster, intern=T))

  } else{
    ###############MAC SPECIFIC COMMAND LINE ARGUMENTS FOR MAP GENERATION###############

    #creates basemap with correct motif
    gmt.system("gmt psbasemap -JM6i -R-180/180/-60/80 -B+t`cat variant` -K > `cat variant`.ps")

    #uses white background to fill landmasses if color=T
    if(color==TRUE){
      #overlays relevant continents onto basemap
      gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -G200 -W0.25p -O -K >> `cat variant`.ps")
    }

    #uses a hashed background to fill landmasses if color=F
    if(color==FALSE){
      gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -B0 -Gp61 -W0.25p -O -K >> `cat variant`.ps")
    }

    #gets upperbound for allele frequencies
    #gmt.system("awk '{print $3}' motif.xyz | sort -r -g | head -1 > upperbound")

    #if(upperbound < 0.05){
    #  upper_round<-shortRoundUp(upperbound)
    #} else{
    #  upper_round<-upperbound
    #}

    #upper_decile<-upper_round/10

    #if(mapScale == TRUE){

    #the Solberg dataset has some rounding errors that produce an allele frequency greater than 1;
    #for those populations, use a set 1.000 and 0.1 for the decile interval
    #if(max(gmt_converted_data$allele_freq) >=1){
    #  cpt_interval<-c(1.000, 0.1)
    #}

    #if the max allele frequency for a motif is under 1, use shortRoundUp, which rounds the last
    #digit in a decimal to either a 5 or 10 if the frequency is less than 0.05
    #if(max(gmt_converted_data$allele_freq) <1){
    #makes a vector called cpt_interval which contains max allelic information and decile increment needed
    #to form deciles
    #uses readLines to obtain upperbound information from bash, and rounds it to the nearest 0.5 if it is less than 0.05
    #uses readLines to obtain decile needed
    # cpt_interval<-c(upper_round, upper_decile)
    #}
    #}

    #if(mapScale == FALSE){
    #  cpt_interval<-c(1.000, 0.1)
    #}

    #creates a vector called decile_interval, which gives decile increments based on cpt_interval information
    #decile_interval<-seq(0, cpt_interval[1], cpt_interval[2])

    #writes max allelic frequency (rounded) and increment needed to form deciles
    write(c(upper_round, upper_decile), "max_cpt")

    #uses seis color palette if color=T
    if(color==TRUE){
      #makes custom CPT with increments of max_frequency/10 for deciles
      #specifically calls on max_cpt for max frequency and decile increments
      gmt.system("gmt makecpt -Cseis -Iz -T0/`awk '{print $1}' max_cpt`/`awk '{print $2}' max_cpt` > decile.cpt")
    } else{
      gmt.system("gmt makecpt -Cgray -Iz -T0/`awk '{print $1}' max_cpt`/`awk '{print $2}' max_cpt` > decile.cpt")
    }

    #use smaller font size if max allele frequency has more digits
    if(nchar(strsplit(as.character(format(decile_interval[2], scientific = FALSE)), ".", fixed=T)[[1]][[2]]) >=5){
      gmt.system("gmt psscale -Dx0.1i/0.9i/1.5i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=14p,Helvetica,black -Np -L -O -K >> `cat variant`.ps")
    } else{
      #adds color scale to basemap based on cpt provided -- makes font on legend smaller
      gmt.system("gmt psscale -Dx0.1i/0.9i/1.5i/0.3i -Cdecile.cpt --FONT_ANNOT_PRIMARY=16p,Helvetica,black -Np -L -O -K >> `cat variant`.ps")
    }

    #overlays more coastlines with pscoast
    gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -Gc -O -K >> `cat variant`.ps")

    if(mask==TRUE){
      #clips/masks map areas with no data table coverage -- radius of influence increased to 900 km
      gmt.system("gmt psmask motif.xyz -R-180/180/-70/80 -I3 -JM6i -S850k -O -K >> `cat variant`.ps")
    }

    #grids .grd file onto map
    gmt.system("gmt grdimage motif.grd -Cdecile.cpt  -JM6i -R-180/180/-60/80 -O -K >> `cat variant`.ps")

    #makes a contour map using a .grd file
    gmt.system("gmt grdcontour motif.grd -JM6i -Cdecile.cpt -A- -R-180/180/-60/80 -O -K >> `cat variant`.ps")

    #plots longtitude/latitude coordinates onto basemap
    gmt.system("gmt psxy motif.block -R-180/180/-60/80 -JM6i -A -G255 -W0.5p -Sc.05 -O -K >> `cat variant`.ps")

    if(mask==TRUE){
      #calls psmask again to terminate clip path with -C parameter
      gmt.system("gmt psmask motif.xyz -R-180/180/-70/80 -I3 -JM6i -S850k -C -O -K >> `cat variant`.ps")
    }

    #calls pcoast again to re-establish coastlines and -Q parameter to quit clipping
    gmt.system("gmt pscoast -JM6i -R-180/180/-60/80 -A30000 -W0.5 -O -Q  >>  `cat variant`.ps")

    #converts ps map to jpg -- saves into local environment
    #requires Ghostscript in order to execute command
    gmt.system(paste("gmt psconvert `cat variant`.ps -A -Tj -P -Qg4 -Z -E", resolution, sep = ''))
  }

  #move final output of motif.jpg directory specified by the direct parameter; default is
  #set to the user's working directory
  #silence file.move message
  suppressMessages(file.move(paste(tempdir(), .Platform$file.sep , variant, ".jpg", sep=""), direct, overwrite = T))

  #for WINDOWS: if allele map is being generated, replace ";" in allele.jpg with a unicode colon
  #rename previous file output with ";" as the delimiter
  #specify directory from direct for file location
  if(OS == "Windows"){
    if(variantType %in% c("allele", "haplotype")){
      file.rename(from = paste(direct, "/" ,paste(variant, ".jpg", sep=""), sep=""), to = paste(direct, "/", paste(gsub(";", "\u02f8", variant), ".jpg", sep=""), sep =""))
    }
  }

  if(variantType == "motif"){
    return(invisible(paste(variant,".jpg written to ",direct,".",sep="")))
  }

  if(variantType == "allele"){

    if(length(vAresults[[2]])!=0){
      return(invisible(append(vAresults[[2]], paste(gsub(";", ":", variant),".jpg written to ",direct,".",sep=""))))
    }

    else{
      return(invisible(paste(gsub(";", ":", variant),".jpg written to ",direct,".",sep="")))
    }
  }

  if(variantType == "haplotype"){
    return(invisible(paste(gsub(";", ":", variant),".jpg written to ",direct,".",sep="")))
  }
}

shortRoundUp <- function(frVal){
  valPlus <- (5-as.numeric(substr(as.character(frVal),nchar(as.character(frVal)),nchar(as.character(frVal)))))
  return(ceiling(frVal*10^(nchar(as.character(frVal))-2)+ifelse(valPlus<0,valPlus+5,valPlus))/(10^(nchar(as.character(frVal))-2)))
}

Try the SSHAARP package in your browser

Any scripts or data that you put into this service are public.

SSHAARP documentation built on June 18, 2025, 1:08 a.m.