amlresistancenetworks: Calculates networks that lead to AML resistance to drugs

Documented in computeFoldChangePvals computePhosphoChanges getCoClusteredProteins getCorrelatedProteins limmaTwoFactorDEAnalysis manualDEAnalysis mapPhosphoToKinase plotKinDat plotSingleProtein

##proteomic stats





#'
#'plot single protein
#'@import dplyr
#'@import ggplot2
#'@export
#'@param gene
#'@param df tidied data fraome
#'@param prefix
plotSingleProtein<-function(gene,df,prefix=''){
    subset(df,Gene==gene)%>%
    ggplot2::ggplot(aes(x=treatment,y=value))+
      ggplot2::geom_jitter(aes(col=ligand),position=position_dodge(0.8)) +
      ggplot2::geom_boxplot(aes(fill=ligand,alpha=0.5),outlier.shape=NA,position=position_dodge(0.8))+
      ggplot2::facet_grid(~cellLine)+
      ggplot2::ggtitle(paste(gene,'Expression',prefix))
    ggsave(paste0(prefix,gene,'Expression.png'))
  
}



#'
#'limmaTwoFactorDEAnalysis
#'uses Osama's code to compute de from limma
#'@author Osama
#'@import BiocManager
#'@export
#'@param data matrix
#'@param group1 ids
#'@param group2 ids
limmaTwoFactorDEAnalysis <- function(dat, sampleIDs.group1, sampleIDs.group2) {
  # Conduct DE expression analysis using limma from the expression matrix dat (group2 vs group1, group1 is reference)
  #
  # Args:
  #   dat: Expression data matrix, rows are genes, columns are samples
  #   sampleIDs.group1: Vector with ids of samples in reference group (eg. normal samples)
  #   sampleIDs.group2: Vector with ids of samples in interest group (eg. tumor samples) 
  #
  # Returns:
  #   limma Differential Expression results.
  #
  #http://www.biostat.jhsph.edu/~kkammers/software/CVproteomics/R_guide.html
  #http://genomicsclass.github.io/book/pages/using_limma.html
  #https://wiki.bits.vib.be/index.php/Tutorial:_Testing_for_differential_expression_I
  if(!require('limma')){
    BiocManager::install('limma')
    
    library(limma)
}
  fac <- factor(rep(c(2,1), c(length(sampleIDs.group2), length(sampleIDs.group1))))
  design <- model.matrix(~fac)
  fit <- lmFit(dat[,c(sampleIDs.group2, sampleIDs.group1)], design)
  fit <- eBayes(fit)
 # print(topTable(fit, coef=2))
  res <- topTable(fit, coef=2, number=Inf, sort.by="none")
  res <- data.frame(featureID=rownames(res), res, stringsAsFactors = F)
  return(arrange(res,P.Value))
}

#' compute manual lfc
#' @export
#' @param data matrix
#' @param group1 ids
#' @param group2 ids
#' @return data frame with fold change, p-value
manualDEAnalysis<-function(data,group1,group2){
  
  if(length(group1)==1)
    control=data[,group1]
  else
    control=rowMeans(data[,group1])
  if(length(group2)==1)
    condition=data[,group2]
  else
    condition=rowMeans(data[,group2])
  res = data.frame(featureID=rownames(data),logFC=condition - control)
  res$absFc = abs(res$logFC)
  res<-subset(res,!is.na(logFC))
  res$P.Value = 1.0-(rank(res$absFc)/nrow(res))
  res$adj.P.Val = p.adjust(res$P.Value)
  return(res)
}


#' Reads in data frame and computes fold change
#' 
#' @export
#' @param data.frame 
#' @param control
#' @param doManual binary set to calculate fc manual
#' @param conditions
#' @return tidied data frame with p-values and fold change
#' @import dplyr
#' @import stringr
#' @import tidyr
#' @import purrr
#' 
computeFoldChangePvals<-function(g.data,
                                 control=c('None'),
                                 conditions=c("FLT3","FGF2"),
                                 doManual=FALSE){
  
#  print(unique(g.data$cellLine))
  
  data<-g.data%>%
    dplyr::select(cellLine,Gene,value,treatment,Sample)%>%
    subset(!is.na(value))%>%
    subset(!is.na(Gene))
  
  #remove those data that have only 1 replicate  
  if(doManual)
    reps=data
  else
    reps<-data%>%
      group_by(cellLine,Gene,treatment)%>%
      dplyr::mutate(reps=length(value))%>%
      subset(reps>1)%>%
      ungroup()        
  
  if(doManual)
    myfun=manualDEAnalysis
  else
    myfun=limmaTwoFactorDEAnalysis
  
  lig.datasets<-purrr::map_df(conditions,function(lig){
#    print(lig)
    g1<-subset(reps,treatment%in%control)%>%dplyr::select(Sample)%>%distinct()
    g2<-subset(reps,treatment==lig)%>%dplyr::select(Sample)%>%distinct()
    matp=reps%>%
      subset(treatment%in%c(control,lig))%>% #get only those valuese that are 'nOne' or the name of the ligand
      dplyr::select(Gene,Sample,value)%>%distinct()%>%
      tidyr::pivot_wider(values_from=value,names_from=Sample,values_fn=list(value=mean))%>%
      tibble::column_to_rownames("Gene")
     myfun(matp,unlist(g1),unlist(g2))%>%
      dplyr::mutate(Condition=lig,Control=paste(control,collapse=','))%>%
      dplyr::select(Gene=featureID,Condition,Control,condition_to_control=logFC,p_val=P.Value,p_adj=adj.P.Val)
    })
 
  return(lig.datasets) 
}

##drug sensitivity statistics

#' Get correlated proteins
#' Computes protein correlations with dose response metric, either AUC or IC50
#' returns list of drugs, proteins and correlation values
#' @param data
#' @param metric
#' @return data.frame
#' @import ggplot2
#' @import dplyr
#' @export
getCorrelatedProteins<-function(data,metric="AUC"){
  cor.stats<-subset(data,Metric==metric)%>%
    group_by(Condition,Gene)%>%
    dplyr::select(`AML sample`,LogFoldChange,Value)%>%distinct()%>%
    mutate(correlation=cor(LogFoldChange,Value))%>%
    dplyr::select(Condition,correlation)%>%
    distinct()
  ggplot2::ggplot(cor.stats)+
    ggplot2::geom_histogram(mapping=ggplot2::aes(x=correlation,fill=Condition),
                            position='dodge')+
    ggplot2::ggtitle(paste('Correlation between protein change and drug',metric))
  ggplot2::ggsave(paste0('drugProtein',metric,'correlation.png'))
  
  univ=unique(cor.stats$Gene)
  sapply(unique(cor.stats$Condition),function(cond){
    subset(cor.stats,Condition==cond)%>%
      dplyr::select(Gene,value='correlation')%>%
      amlresistancenetworks::computeGSEA(prot.univ = univ,prefix=paste(cond,'correlation with',metric))
  })
  
  cor.stats
  
}


#' getCoClusteredProteins
#' Designed to evaluate time-course data
#' Gets groups of proteins that are clustered across time points in a condition of interest
#' @param timeCourseData
#' @return dataFrame
#' @import dplyr
#' 
#' @export 
getCoClusteredProteins<-function(prot.data){
  
  #library(amap)
  # print(cellLine,treatment)
  cellLine=unique(prot.data$cellLine)
  treatment=unique(prot.data$treatment)
  # print(head(prot.data))
  mat<-prot.data%>%
    dplyr::select(-c(cellLine,treatment))%>%
    mutate(hours=if_else(timePoint=='16 hr',16,if_else(timePoint=='3 hr',3,.5)))%>%
    mutate(val=as.numeric(as.character(LogFoldChange)))%>%
    dplyr::select(-c(LogFoldChange,timePoint))%>%
    tidyr::pivot_wider(values_from=val,names_from='hours',values_fn=list(val=mean))
  
  mv<-which(apply(mat,1,function(x) any(is.na(x))))
  if(length(mv)>0)
    mat<-mat[-mv,]
  # wss <-purrr::map_dbl(3:20, ~{Kmeans(dplyr::select(mat,-Gene),.,nstart=5,iter.max=30,method='pearson')$tot.withinss})
  # ggplot(data.frame(k=3:20,wss=wss))+geom_point(aes(x=k,y=wss))
  # ggsave(paste0(treatment,'_treated_',cellLine,'_cellsClusters.png'))
  #let's choose k =10
  clusters = Kmeans(dplyr::select(mat,-Gene),centers=10,nstart=20,iter.max=30,method='pearson')
  centers <- tibble::rownames_to_column(as.data.frame(clusters$centers), "cluster")%>%
    tidyr::pivot_longer(2:4,names_to='timepoint',values_to='logRatio')
  
  # ggplot(centers)+geom_path(aes(x=as.numeric(timepoint),y=logRatio,col=cluster,group=cluster))
  mat$cluster = clusters$cluster 
  
  res<-mat%>%tidyr::pivot_longer(2:4,values_to='logRatio',names_to='time')
  
  ggplot(res,aes(x=as.numeric(time),y=logRatio,col=as.factor(cluster),group=Gene))+
    geom_path()+facet_grid(.~cluster)+
    ggtitle(paste(treatment,'treated',cellLine,'cells'))
  ggsave(paste0(treatment,'_treated_',cellLine,'cells.png'))
  
  return(res)
}


#'get ordered phospho sites
#'@param phosData
#'@return data table of log fold change
#'@import dplyr
computePhosphoChanges<-function(phosData){
  dplyr::select(phosData,Gene,site,Peptide)%>%distinct()%>%
    dplyr::mutate(residue=stringr::str_replace(site,paste0(Gene,'-'),''))%>%
    dplyr::mutate(residue=stringr::str_replace_all(residue,"([STY])", ";\\1"))%>%
    dplyr::mutate(residue=stringr::str_replace(residue,"^;", ""))%>%
    dplyr::mutate(residue=stringr::str_replace_all(residue,"([sty])", ""))
  
}

#'mapPhosphoToKInase
#'reads in phospho data, looads of KSDB and then computes log fold change for each kinase
#'@param pat.phos
#'@return data table of merged data
#'@import dplyr
#'@export
mapPhosphoToKinase<-function(pat.phos){

  KSDB <- read.csv(system.file('PSP&NetworKIN_Kinase_Substrate_Dataset_July2016.csv',
                               package='amlresistancenetworks'),stringsAsFactors = FALSE)

  clinvars<-setdiff(names(pat.phos),c('Protein','Gene','site','Peptide','LogRatio'))
  pat.phos$Gene<-unlist(pat.phos$Gene)
  gene.phos<-computePhosphoChanges(pat.phos)
  phos.with.subs<-gene.phos%>%
    inner_join(rename(KSDB,Gene='SUB_GENE',residue='SUB_MOD_RSD'),by=c('Gene','residue'))%>%
    inner_join(pat.phos,by=c('site','Gene'))

  pat.kin.scores<-phos.with.subs%>%
    dplyr::select('Sample','site',Gene,LogFoldChange,GENE,networkin_score)%>%distinct()%>%
    group_by(Sample,GENE)%>%
    summarize(meanLFC=mean(LogFoldChange),meanNKINscore=mean(networkin_score),numSubstr=n_distinct(Gene))%>%
    rename(Kinase='GENE')
  return(pat.kin.scores)
}

#'plotKinDat
#'@param kindat - output of `mapPhospoToKinase`
#'@param phosData - tidied phospho data
#'@param prefix - string for file
#'@param idcol- name of sample identifier
#'@param vars - sample-specific 
#'@param genelist - select genes to plot
#'@export
#'@import pheatmap
plotKinDat<-function(kindat,phosData=phosData,prefix='all',
                     idcol='Sample',vars=c('Sample','cellLine','Ligand'),
                     genelist=c()){
  library(pheatmap)
  ##create matrix of kinase scores
  mat <-kindat%>%
    ungroup()%>%
    tidyr::pivot_wider(-c(meanNKINscore,numSubstr),
                                                values_from=meanLFC,
                                                names_from=Sample,
                                                values_fn=list(meanLFC=mean))%>%
    tibble::column_to_rownames('Kinase')
  kinAts<-kindat%>%
    ungroup()%>%
    dplyr::select(Kinase,numSubstr)%>%
    distinct()%>%
    group_by(Kinase)%>%
    summarize(substrates=mean(numSubstr))%>%
    tibble::remove_rownames()%>%
  tibble::column_to_rownames('Kinase')
  
  sampAts<-phosData%>%
    dplyr::select(vars)%>%
    distinct()%>%
    tibble::remove_rownames()%>%
    tibble::column_to_rownames(idcol)
  #sampAts$TimePoint=as.factor(sampAts$TimePoint)
  vars=names(sort(apply(mat,1,var),decreasing=T)[1:200])
 pheatmap(mat[vars,],cellwidth = 8,cellheight=8,clustering_distance_cols = 'correlation',
          clustering_distance_rows = 'correlation',
          annotation_row = kinAts,annotation_col=sampAts,
          filename=paste0(prefix,'KinaseHeatmap.pdf'),height=25,width=8) 
}

PNNL-CompBio/amlresistancenetworks documentation built on Feb. 8, 2025, 11:27 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PNNL-CompBio/amlresistancenetworks
Calculates networks that lead to AML resistance to drugs

R/proteinValues.R
In PNNL-CompBio/amlresistancenetworks: Calculates networks that lead to AML resistance to drugs

Defines functions plotKinDat mapPhosphoToKinase computePhosphoChanges getCoClusteredProteins getCorrelatedProteins computeFoldChangePvals manualDEAnalysis limmaTwoFactorDEAnalysis plotSingleProtein

Documented in computeFoldChangePvals computePhosphoChanges getCoClusteredProteins getCorrelatedProteins limmaTwoFactorDEAnalysis manualDEAnalysis mapPhosphoToKinase plotKinDat plotSingleProtein

R Package Documentation

Browse R Packages

We want your feedback!

PNNL-CompBio/amlresistancenetworks Calculates networks that lead to AML resistance to drugs

R/proteinValues.R In PNNL-CompBio/amlresistancenetworks: Calculates networks that lead to AML resistance to drugs

Defines functions plotKinDat mapPhosphoToKinase computePhosphoChanges getCoClusteredProteins getCorrelatedProteins computeFoldChangePvals manualDEAnalysis limmaTwoFactorDEAnalysis plotSingleProtein

Documented in computeFoldChangePvals computePhosphoChanges getCoClusteredProteins getCorrelatedProteins limmaTwoFactorDEAnalysis manualDEAnalysis mapPhosphoToKinase plotKinDat plotSingleProtein

R Package Documentation

Browse R Packages

We want your feedback!

PNNL-CompBio/amlresistancenetworks
Calculates networks that lead to AML resistance to drugs

R/proteinValues.R
In PNNL-CompBio/amlresistancenetworks: Calculates networks that lead to AML resistance to drugs