#' doFlyMineAnalysis
#'
#' This function determines the lethality status in D melanogaster of a given human gene
#'
#' Note: queries the WormMine API using Ruby see http://im-dev.wormbase.org/tools/wormmine/api.do?subtab=ruby
#'
#' @param con A \code{SQLiteConnection} object
#' @param genes A vector of human ENSEMBL gene ids
#' @param alt_mart A Mart object created using biomaRt::useMart. This should specify \code{hsapiens_gene_ensembl}
#' as the dataset. See \code{\link{getOrthologs}} for more details.
#' @return a data frame with the results of the analysis
#' @export
doFlyMineAnalysis <- function(con, genes, alt_mart=NULL) {
#get the fly orthologs
fly_orthologs <- getOrthologs(con, genes, 'dmelanogaster', alt_mart)
#group the fly orthologs for those genes with orthologs
fly_orthologs_grouped <- fly_orthologs %>%
dplyr::filter(nchar(dmelanogaster_homolog_orthology_type) > 4) %>%
dplyr::group_by(ensembl_gene_id, dmelanogaster_homolog_orthology_type) %>%
dplyr::summarise(dmelanogaster_count_orthologs=n(),
dmelanogaster_ortholog_ids=paste(dmelanogaster_homolog_ensembl_gene, collapse='|')) %>%
dplyr::ungroup()
#sort out entries with no ortholog
no_fly_orthologs <- fly_orthologs %>%
dplyr::filter(nchar(dmelanogaster_homolog_orthology_type) < 4) %>%
dplyr::transmute(ensembl_gene_id,
dmelanogaster_homolog_orthology_type='no_fly_ortholog',
dmelanogaster_count_orthologs=0,
dmelanogaster_ortholog_ids='')
#combine and mark up the human genes with a single fly ortholog
fly_orthologs_combined <- dplyr::bind_rows(fly_orthologs_grouped, no_fly_orthologs) %>%
dplyr::mutate(single_ortholog_filter=(dmelanogaster_count_orthologs==1)) %>%
as.data.frame()
#now format gene text for ruby script
fly_gene_ids <- fly_orthologs_combined %>%
dplyr::filter(single_ortholog_filter) %>%
dplyr::transmute(id=dmelanogaster_ortholog_ids) %>%
dplyr::distinct()
# list of gene-chunks, each of chunk 500 genes
gene_chunks <- split(fly_gene_ids$id, ceiling(seq_along(fly_gene_ids$id)/500))
message(paste("splitting data into ",length(gene_chunks),"chunks of up to 500 genes"))
#internal function to get the data from Intermine
queryMine <- function(query_genes) {
#run the ruby script
message(sprintf("Get data for %s fly genes from FlyMine", length(query_genes)))
fly_genes_txt <- paste(query_genes, collapse=',')
flymine_cmd <- sprintf('ruby \'%s/ruby/doFlyMineAnalysis.rb\' "%s"',system.file(package='CollateralVulnerability2016'), fly_genes_txt)
flymine_data_txt <- system(flymine_cmd, intern=TRUE)
#import the data into a data frame
readr::read_tsv(paste(flymine_data_txt, collapse='\n'),
skip=1,
col_types='ccccccc',
col_names=c("primaryIdentifier", "symbol", "alleles_alleleClass", "alleles_primaryIdentifier",
"alleles_symbol", "alleles_phenotypeAnnotations_annotationType",
"alleles_phenotypeAnnotations_description"))
}
#get the data from intermine
flymine_data <- lapply(gene_chunks, queryMine) %>% dplyr::bind_rows() %>% as.data.frame()
message(sprintf('%s records retrieved, now analysing', nrow(flymine_data)))
#filter the flymine data
flymine_data_filtered <- flymine_data %>%
dplyr::filter(alleles_phenotypeAnnotations_annotationType == 'phenotype class') %>%
dplyr::mutate(is_lethal_fly = grepl('lethal',alleles_phenotypeAnnotations_description)) %>%
as.data.frame()
##generate a summary table with one row per allele, highlgiths whether any phenotype annotations are lethal
flymine_data_byallele <- flymine_data_filtered %>%
dplyr::group_by(primaryIdentifier, symbol, alleles_alleleClass, alleles_primaryIdentifier, alleles_symbol) %>%
dplyr::summarise(is_lethal_fly=as.logical(max(is_lethal_fly))) %>%
dplyr::ungroup() %>%
as.data.frame()
#now count the number of lethal vs non-lethal alleles
flymine_data_bygene <- flymine_data_byallele %>%
dplyr::group_by(primaryIdentifier, symbol) %>%
dplyr::summarise(allele_count_fly=n(),
lethal_count_fly=sum(is_lethal_fly),
lethal_pct_fly=round(lethal_count_fly*100/allele_count_fly,1)) %>%
dplyr::ungroup() %>%
as.data.frame()
#combine output
output_df <- fly_orthologs_combined %>%
dplyr::left_join(flymine_data_bygene, by=c('dmelanogaster_ortholog_ids'='primaryIdentifier')) %>%
as.data.frame()
#put data into sqlite database
DBI::dbWriteTable(con, "flymine_analysis_martdata_grouped", fly_orthologs_combined, overwrite=TRUE)
DBI::dbWriteTable(con, "flymine_analysis_flymine_filtered", flymine_data_filtered, overwrite=TRUE)
DBI::dbWriteTable(con, "flymine_analysis_flymine_byallele", flymine_data_byallele, overwrite=TRUE)
DBI::dbWriteTable(con, "flymine_analysis_flymine_bygene", flymine_data_bygene, overwrite=TRUE)
DBI::dbWriteTable(con, "flymine_analysis_output", output_df, overwrite=TRUE)
message('Finished')
return(output_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.