knitr::opts_chunk$set(echo = TRUE)
Loading all the relevant data and creating the data object with the mutation and viability scores for each type of cancer.
library(reshape2) library(dplyr) library(viridis) library(ggplot2) library(nem) library(biclust) library(UpSetR) # causal packages library(tableone) library(ipw) # library(sandwich) #for robust variance estimation # library(survey) library(Matching) library(slidr) # Set path to all data base_folder = "~/GitHub/CrispR/Data/" # Loading mutations for all cell lines all_cancers_mut_df <- read.delim(paste0(base_folder, "MutationFiles/MutationsCCLE/CCLE_maf_allcancers.txt"), stringsAsFactors = FALSE, header = TRUE) # Filtering out silent mutations all_cancers_mut_df <- all_cancers_mut_df %>% dplyr::filter(Variant_Classification != "Silent" ) # Load annotation file and meta data cellline_annot <- read.csv(paste0(base_folder, "ProjectDRIVE/TableS2.csv"), stringsAsFactors = F) meta_data <- read.csv(paste0(base_folder, "MutationFiles/File_metadata.csv"), header = TRUE, stringsAsFactors = FALSE) Primary_sites <- meta_data$Primary_site # Load log2 copy number data CN_df <- read.delim(paste0(base_folder,"/CopyNumber/CCLE_copynumber_byGene.txt"), stringsAsFactors = F, header = T) # Load Gistic copy number data CN_df_gistic <- read.delim(paste0(base_folder,"/CopyNumber/CCLE_CNA_Integers.txt"), stringsAsFactors = F, header=T) CN_df_gistic <- CN_df_gistic[-which(duplicated(CN_df_gistic$Hugo_Symbol)),] # Getting essential genes data_rsa <- readRDS(paste0(base_folder, "ProjectDRIVE/DRIVE_RSA_data.RDS")) # Remove the essential data from ATARiS normalised data data_ataris <- readRDS(paste0(base_folder, "ProjectDRIVE/DRIVE_ATARiS_data.RDS")) # Getting a list of cell lines for each cancer type cellline_list <- lapply(Primary_sites, function(x){slidr::getCelllines(x, cellline_annot, meta_data)}) names(cellline_list) <- Primary_sites # Getting a list of essential genes for each cancer type essen_gene_list <- lapply(names(cellline_list), function(x){slidr::getEssentialGenes(x = x, celllines = cellline_list[[x]], data = data_rsa)}) names(essen_gene_list) <- Primary_sites all_data = list() fdr = 0.05 min_Nmut = 2 # Generate objects for each primary site all_data <- lapply(Primary_sites, function(x){ slidr::prepareDataObjects(data = data_ataris, x = x, fdr = fdr, min_Nmut = min_Nmut, all_cancers_mut_df = all_cancers_mut_df, CN_df = CN_df, #CN_df_gistic gistic = FALSE, #TRUE celllines = cellline_list[[x]], meta_data = meta_data, essential_genes = essen_gene_list[[x]])}) names(all_data) <- Primary_sites
Once the data is processed in the correct format, use the identifySLHits
function to get the mutation-specific SL partner for each cancer type.
path_results <- "~/Downloads/Slidr_Results_new/" hits <- lapply(all_data, function(x){slidr::identifySLHits(canc_data = x, path_results = path_results, WT_pval_thresh = 0.1)}) names(hits) <- names(all_data) save.image(paste0(path_results, "ProcessedData.Rdata"))
Creating the data object for the pan-cancer analysis.
# Choosing cell lines with both CN and viability data and the driver genes pc_celllines <- intersect(colnames(data_ataris),tolower(colnames(CN_df))) # driver_genes <- read.delim(paste0(base_folder, # "MutationFiles/Cell2018_mutations.txt"), # stringsAsFactors = FALSE, header = FALSE)[[1]] driver_genes <- unique(all_cancers_mut_df$Hugo_Symbol) # Getting the essential genes pc_essen_gene <- slidr::getEssentialGenes(x = NULL, data = data_rsa, celllines = pc_celllines) # Removing the essential genes pc_data <- list(viabilities = NULL, mutations = NULL, CNalterations = NULL, mutation_annot = NULL, primary_site = "pan_cancer") data_ataris <- as.data.frame(t(apply(data_ataris, 1, function(x){ x[which(is.na(x))] <- mean(x, na.rm = TRUE) x}))) pc_data$viabilities <- data_ataris[!rownames(data_ataris) %in% pc_essen_gene,pc_celllines] # Threshold for removing drivers with fewer than 8% mutated samples mut_pc <- 8 #2 mut_thresh <- floor(mut_pc * length(pc_celllines) / 100) CN_Thr <- 1 # Threshold to choose only deep deletions # Binary mutation data mut_mat <- slidr::prepareMutMat(x = NULL, driver_genes = driver_genes, samples = pc_celllines, all_cancers_mut_df = all_cancers_mut_df) pc_data$CNalterations <- slidr::prepareCNMat(CN_df = CN_df, samples = pc_celllines, driver_genes = driver_genes, x = NULL) # binarized copy number data with only deep deletions CN_bin <- binarize(2 - pc_data$CNalterations, threshold = CN_Thr) CN_bin[is.na(CN_bin)] <- 0 # Updating copy numbers in mutation matrix pc_data$mutations <- binarize((CN_bin + mut_mat), threshold = 0) # Removing genes with < 8% mutated samples pc_data$mutations <- pc_data$mutations[rowSums(pc_data$mutations) >= mut_thresh, ] # Create a mutation annotation file mut_mat_annot <- melt(pc_data$mutations) mut_mat_annot <- mut_mat_annot %>% dplyr::filter(value != 0) colnames(mut_mat_annot) <- c("Hugo_Symbol","Cell_Line","Mut_Status") # Use copy number info in the mutation annotation file all_cancers_mut_df$Tumor_Sample_Barcode <- tolower(all_cancers_mut_df$Tumor_Sample_Barcode) CN_alterations_df <- melt(pc_data$CNalterations) colnames(CN_alterations_df) <- c("Hugo_Symbol","Cell_Line","CN_Value") # Annotating mutations and adding copy number information pc_data$mutation_annot <- left_join(mut_mat_annot,all_cancers_mut_df, by = c("Hugo_Symbol" = "Hugo_Symbol","Cell_Line" = "Tumor_Sample_Barcode")) %>% dplyr::select(Hugo_Symbol, Cell_Line, Mut_Status, Variant_Classification) %>% dplyr::group_by(Hugo_Symbol,Cell_Line,Mut_Status) %>% dplyr::summarise(Variant_Classification = paste(Variant_Classification, collapse=";")) pc_data$mutation_annot <- left_join(pc_data$mutation_annot, CN_alterations_df, by = c("Hugo_Symbol","Cell_Line")) # Replacing all the na pc_data$mutation_annot[is.na(pc_data$mutation_annot)] <- 0 save.image("~/Downloads/Slidr_Results_new/pancan.Rdata")
Identify Synthetic lethals from pan cancer data.
# Path for results path_results <- "~/Downloads/Slidr_Results_new/" hits_pancan <- slidr::identifySLHits(canc_data = pc_data, path_results = path_results, WT_pval_thresh = 0, fp_thresh = 1)
Plotting p-values of the SL pairs and identifying partners that are partners for many drivers.
hits_pancan <- read.delim("~/Downloads/Slidr_Results/Pan_cancer_8pc/Hit_List/SL_hits_pan_cancer.txt",stringsAsFactors = F) hits_pancan$sl_partner_gene <- sapply(hits_pancan$sl_partner_gene, function(x){strsplit(x, ",")[[1]][1]}) p1 <- ggplot(hits_pancan, aes(x=driver_gene, y=sl_partner_gene, size = -log(mut_pvalue, base = 10))) + geom_point(fill = "#055C95",color="#055C95", alpha=0.45) + scale_size_continuous(range=c(2, 10)) + theme_bw() + xlab("Driver genes") + ylab("SL partner genes") + labs(size = "-log(p-value)")+ theme(panel.grid.major = element_line(size = 0.1), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), axis.text.y=element_text(angle=0, size=11,colour="#525252"), axis.text.x=element_text(angle=90, size=11,vjust=0.5 ,hjust=1, colour="#525252"), axis.title.x=element_text(angle=0, size=20,colour="#525252"), axis.title.y=element_text(angle=90, size=20,colour="#525252"), legend.position="bottom", legend.title = element_text(angle=0, size=13,colour="#525252"), legend.text = element_text(size=13,colour="#525252"), legend.key.size = unit(0.75, "cm")) ggsave(filename = "~/Downloads/Slidr_Results/Pan_cancer_8pc/SLpairs_8pc_bubblePlot.pdf", p1, width = 18, height = 17, units = "in")
Causal inference
set.seed(478) hits_pancan <- read.delim("~/Downloads/Slidr_Results/Pan_cancer_8pc/Hit_List/SL_hits_pan_cancer.txt",stringsAsFactors = F) load("~/Downloads/Slidr_Results/Pan_cancer_8pc/pancan.Rdata") causal_hits <- data.frame("driver_gene" = character(), "sl_partner_gene" = character(), "pval" = numeric()) grouped_hits <- hits_pancan %>% dplyr::group_by(sl_partner_gene) %>% summarise(drivers = paste(driver_gene, collapse = ",")) # set the caliper for matching set_cal = 0.1 for(i in 1:nrow(grouped_hits)){ temp_drivers <- sapply(grouped_hits$drivers[i], function(x){strsplit(x,",")[[1]]}) if(length(temp_drivers) >= 2){ # running for two or more genes # defining the causal df sl_gene <- grouped_hits$sl_partner_gene[i] causal_df <- t(pc_data$mutations[temp_drivers,]) causal_df <- cbind.data.frame(t(pc_data$viabilities[sl_gene,rownames(causal_df)]), causal_df) # To avoid confusion between driver and same gene as target colnames(causal_df)[1] <- "Viability" causal_df[,"Viability"] <- 1 - binarize(causal_df[,"Viability"], threshold = median(causal_df[,"Viability"])) colnames(causal_df) <- gsub('-', "_", colnames(causal_df)) temp_drivers <- gsub('-', "_", temp_drivers) for(j in 1: length(temp_drivers)){ # t_res <- NULL t_res <- data.frame("smd_sum" = numeric(), "p_val" = numeric()) for(k in 1:50){ # Shuffling the rows for matching temp_df <- causal_df[sample(1:nrow(causal_df)),] # values of viabilities and treatment var y <- temp_df$Viability tr_gene <- temp_drivers[j] # covariates xvars <- temp_drivers[-j] table1 <- CreateTableOne(vars = xvars, strata = tr_gene, data = temp_df, test = FALSE) #print(table1,smd=TRUE) # Propensity matching psmodel <- glm(as.formula(paste0(tr_gene," ~" ,paste(xvars, collapse = " + "))), family = binomial(), data = temp_df) #create propensity score pscore <- psmodel$fitted.values #do greedy matching on logit(PS) using Match with a caliper logit <- function(p) {log(p)-log(1-p)} err_mes <- try(Match(Tr = temp_df[,tr_gene], M = 1, X = logit(pscore), replace = FALSE, caliper = set_cal, estimand = "ATT"), silent = FALSE) if(is.na(err_mes[[1]])){ # t_res <- c(t_res,NaN) t_res <- rbind.data.frame(t_res, cbind.data.frame(smd_sum = NaN, p_val = NaN)) }else{ psmatch <- Match(Tr = temp_df[,tr_gene], M = 1, X = logit(pscore), replace = FALSE, caliper = set_cal, estimand = "ATT") matched <- temp_df[unlist(psmatch[c("index.treated","index.control")]), ] #get standardized differences matchedtab1 <- CreateTableOne(vars=xvars, strata =tr_gene, data=matched, test = FALSE) #print(matchedtab1, smd = TRUE) temp_smd_sum <- sum(ExtractSmd(matchedtab1)) #outcome analysis y_trt <- matched$Viability[matched[,tr_gene] == 1] y_con <- matched$Viability[matched[,tr_gene] == 0] #pairwise difference diff_y <- y_trt - y_con # paired t-test if(class(try(t.test(diff_y), silent = TRUE)) == "try-error"){ t_res <- rbind.data.frame(t_res, cbind.data.frame(smd_sum = temp_smd_sum, p_val = NaN)) }else t_res <- rbind.data.frame(t_res, cbind.data.frame(smd_sum = temp_smd_sum, p_val = t.test(diff_y)$p.value)) } # t_res <- t_res[!is.nan(t_res)] # print(length(t_res)) # print(pchisq(-2 * sum(log(t_res)), # df = 2 * length(t_res), # lower.tail=FALSE)) # comb_pval <- pchisq(-2 * sum(log(t_res)), # df = 2 * length(t_res), # lower.tail = FALSE) # } # colnames(t_res) <- c("smd_sum", "p_val") t_res <- t_res[!is.nan(t_res$p_val),] # causal_hits <- rbind.data.frame(causal_hits, # cbind.data.frame(tr_gene, sl_gene, comb_pval)) # if all viabilities are equal then p-value will be NaN and hence we set the value to 1 if(nrow(t_res) != 0){ # setting p-val to median p-val when you have multiple samples with same min smd if(sum(t_res$smd_sum == min(t_res$smd_sum)) > 1){ fin_pval <- t_res %>% dplyr:: filter(smd_sum == min(t_res$smd_sum)) %>% dplyr::summarize(x = mean(p_val)) }else fin_pval <- t_res$p_val[which.min(t_res$smd_sum)] }else{ fin_pval <- NaN } causal_hits <- rbind.data.frame(causal_hits, cbind.data.frame(driver_gene = tr_gene, sl_partner_gene = sl_gene, pval = as.numeric(fin_pval))) } } } # retaining pairs for which true causal hit exists best_causal_hits <- causal_hits %>% dplyr::group_by(sl_partner_gene) %>% dplyr::slice(which.min(pval)) # filtering out the pairs for which true causal hit exists insig_causal_hits <- causal_hits %>% dplyr::group_by(sl_partner_gene) %>% dplyr::slice(-which.min(pval)) # Remove the insignificant causal pairs from the total pancancer hits filt_hits_pancan <- hits_pancan %>% dplyr::anti_join(insig_causal_hits, by = c("driver_gene", "sl_partner_gene"))
Plotting p-values for a given pair across different cancers.
# hits_pancan <- read.delim("~/Downloads/Slidr_Results/Pan_cancer_8pc/Hit_List/SL_hits_pan_cancer.txt",stringsAsFactors = F) # # load("~/Downloads/Slidr_Results/Pan_cancer_8pc/pancan.Rdata") load("~/Downloads/Slidr_Results/Pan_cancer_8pc/causal_res.RData") original_hits_pancan <- hits_pancan hits_pancan <- filt_hits_pancan # create data frame for each pair with frequency of different celllines canc_type_df <- do.call(rbind.data.frame, lapply(1:nrow(hits_pancan), function(x){ mutations <- pc_data$mutations mut_celllines <- names(which(mutations[hits_pancan$driver_gene[x],] == 1)) cancer_types <- sapply(mut_celllines, function(x){paste(strsplit(x,"_")[[1]][-1], collapse = "_")}) cancer_types <- as.data.frame(table(cancer_types)) data.frame(hits_pancan$driver_gene[x], hits_pancan$sl_partner_gene[x], cancer_types) #} })) colnames(canc_type_df) <- c("driver_gene", "sl_partner_gene", "canc_type", "n_samples") min_Nmut = 2 # get the p-value for each pair and type of cancer pval_df <- do.call(rbind.data.frame, lapply(1:nrow(canc_type_df), function(x){ canc_type <- as.character(canc_type_df$canc_type[x]) driver <- as.character(canc_type_df$driver_gene[x]) sl_partner <- as.character(canc_type_df$sl_partner_gene[x]) celllines <- grep(canc_type,colnames(pc_data$mutations)) mut_samples <- sum(pc_data$mutations[driver,celllines]) if(mut_samples >= min_Nmut & (length(celllines) - mut_samples) >= min_Nmut){ temp <- pc_data temp$mutations <- pc_data$mutations[,celllines] temp$viabilities <- pc_data$viabilities[,celllines] c(canc_type,slidr::getPval(temp,driver,sl_partner)) }else c(canc_type,driver,sl_partner,1,1) })) colnames(pval_df) <- c("canc_type","driver_gene", "sl_partner_gene", "WT_pvalue", "mut_pvalue") summary_df <- left_join(canc_type_df, pval_df, by = c("driver_gene","sl_partner_gene","canc_type")) summary_df$sl_partner_gene <- sapply(summary_df$sl_partner_gene, function(x){strsplit(x, ",")[[1]][1]}) summary_df <- cbind(paste(summary_df$driver_gene,summary_df$sl_partner_gene, sep = "_"), summary_df, rowSums(pc_data$mutations[summary_df$driver_gene,])) colnames(summary_df)[c(1,8)] <- c("sl_pairs","tot_samples") summary_df <- summary_df %>% dplyr::filter(mut_pvalue != 1 & WT_pvalue != 1) # Using bubble plot to summarize the p-values in each cancer type q1 <- ggplot(summary_df, aes(y=canc_type, x=sl_pairs, size = -log(as.numeric(as.character(mut_pvalue)), base = 10))) + geom_point(fill = "#055C95",color="#055C95", alpha=0.45) + scale_size_continuous(range=c(2, 8)) + theme_bw() + ylab("Primary sites") + xlab("Driver and SL partner genes") + labs(size = "-log(p-value)")+ theme(panel.grid.major = element_line(size = 0.1), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank(), axis.ticks.y = element_blank(), axis.text.x = element_text(angle = 90, vjust=0.5, hjust=1, size=13,colour="#525252"), axis.text.y = element_text(angle = 0, size=14, vjust=0.5, hjust=1, colour="#525252"), axis.title.x=element_text(angle = 0, size=20,colour="#525252"), axis.title.y=element_text(angle=90, size=20,colour="#525252"), legend.position="bottom", legend.title = element_text(angle=0, size=15,colour="#525252"), legend.text = element_text(size=13,colour="#525252"), legend.key.size = unit(0.75, "cm")) + scale_y_discrete(labels = c("Autonomic ganglia", "Bone", "Breast", "CNS", "Endometrium", "Blood", "Kidney", "Large intestine", "Liver", "Lung", "Oesophagus", "Ovary", "Pancreas", "Skin", "Soft tissue", "Stomach", "UADT", "Urinary tract") )# No Pleura after causal filtering # Using a heatmap to describe the p-values in each cancer type q2 <- ggplot(summary_df, aes(x = sl_pairs,y = canc_type)) + geom_tile(aes(fill = -log(as.numeric(as.character(mut_pvalue))))) + scale_fill_viridis(option="D", begin = 0, end = 1, alpha = 0.9) + theme_bw() + ylab("Primary sites") + xlab("Driver and SL partner genes") + labs(fill = "-log(p-value)")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 90, vjust=0.5, hjust=1, size=13,colour="#525252"), axis.text.y = element_text(angle = 0, size=14, vjust=0.5, hjust=1, colour="#525252"), axis.title.x=element_text(angle = 0, size=20,colour="#525252"), axis.title.y=element_text(angle=90, size=20,colour="#525252"), legend.position="bottom", legend.title = element_text(angle=0, size=15,colour="#525252"), legend.text = element_text(size=13,colour="#525252"), legend.key.size = unit(0.9, "cm"))+ #panel.border = element_blank()) + scale_y_discrete(labels = c("Autonomic ganglia", "Bone", "Breast", "CNS", "Endometrium", "Blood", "Kidney", "Large intestine", "Liver", "Lung", "Oesophagus", "Ovary", "Pancreas", "Skin", "Soft tissue", "Stomach", "UADT", "Urinary tract") ) # No Pleura after causal filtering ggsave(filename = "~/Downloads/Slidr_Results/Pan_cancer_8pc/SLpairs_canctype_8pc_bubblePlot.pdf", q1, width = 35, height = 9, units = "in") ggsave(filename = "~/Downloads/Slidr_Results/Pan_cancer_8pc/SLpairs_canctype_8pc_heatmap.pdf", q2, width = 35, height = 9, units = "in")
Plotting stacked bar plot for number of samples.
barplot_df <- summary_df %>% dplyr::distinct(driver_gene, n_samples, canc_type, tot_samples) barplot_df$canc_type <- factor(barplot_df$canc_type, labels = c("Autonomic ganglia", "Bone", "Breast", "CNS", "Endometrium", "Blood", "Kidney", "Large intestine", "Liver", "Lung", "Oesophagus", "Ovary", "Pancreas", "Pleura", "Skin", "Soft tissue", "Stomach", "UADT", "Urinary tract")) coloursSites <- c("#CC99BB", "#AA4488", "#771155", "#77AADD", "#4477AA", "#114477", "#77CCCC", "#44AAAA", "#117777", "#88CCAA", "#44AA77", "#117744", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411", "#DD7788") p2 <- ggplot(barplot_df, aes(fill = canc_type, x = reorder(driver_gene,tot_samples), y = n_samples)) + geom_bar(stat = "identity") + theme_bw() + ylab("Number of mutated cell lines") + xlab("Driver genes") + labs(fill = "Primary site")+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.line = element_line(colour = "#525252"), axis.text.x = element_text(angle = 90, vjust=0.4, hjust=1, size=11,colour="#525252"), axis.text.y = element_text(angle = 0, size=11, vjust=1, hjust=1, colour="#525252"), axis.title.x=element_text(angle = 0, size=16,colour="#525252"), axis.title.y=element_text(angle=90, size=16,colour="#525252"), legend.position="right", legend.title = element_text(angle=0, size=13,colour="#525252"), legend.text = element_text(size=12,colour="#525252"), legend.key.size = unit(0.75, "cm"))+ scale_fill_manual(values = coloursSites)+ scale_y_continuous(breaks = seq(0, 280, by = 40)) ggsave("~/Downloads/Slidr_Results/Pan_cancer_8pc/drivers_canctype_8pc_barplot.pdf", p2, width = 18, height = 8, units = "in")
Group the SL partners and learn the subset structures between mutated genes. We use the nem
package for this.
grouped_hits <- hits_pancan %>% dplyr::group_by(sl_partner_gene) %>% summarise(drivers = paste(driver_gene, collapse = ",")) for(i in 1:nrow(grouped_hits)){ temp_drivers <- sapply(grouped_hits$drivers[i], function(x){strsplit(x,",")[[1]]}) if(length(temp_drivers) >= 2){ # running for two or more genes D <- t(pc_data$mutations[temp_drivers,]) control <- set.default.parameters(unique(colnames(D)),type="mLL",pcombi = TRUE, trans.close=TRUE, para = c(1e-4,1e-4)) temp_res <- nem(D, inference = "ModuleNetwork", control=control, verbose=FALSE) temp_adj_mat <- transitive.reduction(as(temp_res$graph, "matrix")) pdf(file = paste(path_results, "Subset_plots/", grouped_hits$sl_partner_gene[i], ".pdf", sep = "")) plot(as(temp_adj_mat, "graphNEL"), attrs=list(node=list(shape = "ellipse", color="olivedrab3", fontcolor="deepskyblue4", fontsize=12), edge=list(color="gray56", arrowhead = "normal")), main= grouped_hits$sl_partner_gene[i], col.main = "gray56") dev.off() } }
Trying other models like IPTW for causality.
set.seed(478) hits_pancan <- read.delim("~/Downloads/Slidr_Results/Pan_cancer_8pc/Hit_List/SL_hits_pan_cancer.txt",stringsAsFactors = F) load("~/Downloads/Slidr_Results/Pan_cancer_8pc/pancan.Rdata") grouped_hits <- hits_pancan %>% dplyr::group_by(sl_partner_gene) %>% summarise(drivers = paste(driver_gene, collapse = ",")) for(i in 1:nrow(grouped_hits)){ temp_drivers <- sapply(grouped_hits$drivers[i], function(x){strsplit(x,",")[[1]]}) if(length(temp_drivers) >= 2){ # running for two or more genes # defining the causal df sl_gene <- grouped_hits$sl_partner_gene[i] causal_df <- t(pc_data$mutations[temp_drivers,]) causal_df <- cbind.data.frame(t(pc_data$viabilities[sl_gene,rownames(causal_df)]), causal_df) # To avoid confusion between driver and same gene as target colnames(causal_df)[1] <- "Viability" causal_df[,"Viability"] <- 1 - binarize(causal_df[,"Viability"], threshold = median(causal_df[,"Viability"])) colnames(causal_df) <- gsub('-', "_", colnames(causal_df)) temp_drivers <- gsub('-', "_", temp_drivers) for(j in 1: length(temp_drivers)){ # values of viabilities and treatment var y <- causal_df$Viability tr_gene <- temp_drivers[j] treatment <- causal_df[,tr_gene] # covariates xvars <- temp_drivers[-j] table1 <- CreateTableOne(vars = xvars, strata = tr_gene, data = causal_df, test = FALSE) print(table1,smd=TRUE) # Propensity matching psmodel <- glm(as.formula(paste0(tr_gene," ~" ,paste(xvars, collapse = " + "))), family = binomial(link ="logit"), data = causal_df) ## value of propensity score for each subject ps <- predict(psmodel, type = "response") #create weights weight <- ifelse(treatment==1,1/(ps),1/(1-ps)) truncweight <- replace(weight,weight>600,600) #apply weights to data weighteddata <- svydesign(ids = ~ 1, data = causal_df, weights = ~ truncweight) #weighted table 1 weightedtable <- svyCreateTableOne(vars = xvars, strata = tr_gene, data = weighteddata, test = FALSE) ## Show table with SMD print(weightedtable, smd = TRUE) #get causal risk difference glm.obj <- glm(formula(paste0("Viability ~", tr_gene)), weights = truncweight, family = quasibinomial(link="identity"), data = causal_df) #summary(glm.obj) betaiptw <- coef(glm.obj) SE <- sqrt(diag(vcovHC(glm.obj, type="HC0"))) causalrd <- (betaiptw[2]) lcl <- (betaiptw[2]-1.96*SE[2]) ucl <- (betaiptw[2]+1.96*SE[2]) c(lcl,causalrd,ucl) z <- betaiptw[2]/SE[2] exp((-0.717 * z) - (0.416 * z^2)) } # #McNemar test # mc_res <- mcnemar.test(as.matrix(table(y_trt,y_con))) # mc_res$p.value }} # fit propensity score model to get weights, but truncated weightmodel<-ipwpoint(exposure= IFNE , family = "binomial", link ="logit", denominator= ~ MTAP + IFNA1 + IFNA13 + IFNA8 + IFNA17 + IFNW1 + IFNA14 + IFNA21 + IFNA16 + IFNA2 + IFNA7 + PTPLAD2 + IFNA4 + IFNB1 + IFNA5 + DMRTA1 + CDKN2B + IFNA6 + IFNA10 + KLHL9, data = causal_df, trunc = 0) weighteddata <- svydesign(ids = ~ 1, data = causal_df, weights = ~ weightmodel$weights.trun) weightedtable <- svyCreateTableOne(vars = xvars, strata = tr_gene, data = weighteddata, test = FALSE) print(weightedtable, smd = TRUE) #numeric summary of weights summary(weightmodel$weights.trun) #plot of weights ipwplot(weights = weightmodel$weights.trun, logscale = FALSE, main = "weights") causal_df$wt<-weightmodel$weights.trun #fit a marginal structural model (risk difference) msm <- (svyglm(Viability ~ IFNE, design = svydesign(~ 1, weights = ~wt, data =causal_df))) coef(msm) confint(msm) se <- (confint(msm)[2,2] - confint(msm)[2,1])/(2*1.96) z <- coef(msm)[2]/se exp((-0.717 * z) - (0.416 * z^2))
Comparing the tp53 pan-cancer hits after dividing samples based on hot-spots and truncating mutations
path_results <- "~/Downloads/Slidr_Results/" set.seed(58972) hits_pancan <- read.delim("~/Downloads/Slidr_Results/Pan_cancer_8pc/Hit_List/SL_hits_pan_cancer.txt",stringsAsFactors = F) load("~/Downloads/Slidr_Results/Pan_cancer_8pc/pancan.Rdata") p53_missense_cl <- pc_data$mutation_annot %>% dplyr::filter(Hugo_Symbol == "TP53" & Variant_Classification == "Missense_Mutation") %>% dplyr::ungroup() %>% dplyr::select(Cell_Line) %>% t() p53_truncate_cl <- pc_data$mutation_annot %>% dplyr::filter(Hugo_Symbol == "TP53" & Variant_Classification != "Missense_Mutation") %>% dplyr::ungroup() %>% dplyr::select(Cell_Line) %>% t() p53_missense_data <- list() p53_missense_data$viabilities <- dplyr::select(pc_data$viabilities, -p53_truncate_cl) # p53_missense_data$mutations <- t(pc_data$mutations["TP53",which(!colnames(pc_data$mutations) %in% p53_truncate_cl)]) # rownames(p53_missense_data$mutations) <- "TP53" # p53_missense_data$CNalterations <- t(pc_data$CNalterations["TP53",which(!colnames(pc_data$CNalterations) %in% p53_truncate_cl)]) # rownames(p53_missense_data$CNalterations) <- "TP53" p53_missense_data$mutations <- pc_data$mutations[,which(!colnames(pc_data$mutations) %in% p53_truncate_cl)] p53_missense_data$CNalterations <- pc_data$CNalterations[,which(!colnames(pc_data$CNalterations) %in% p53_truncate_cl)] # p53_missense_data$mutation_annot <- pc_data$mutation_annot %>% # dplyr::filter(Hugo_Symbol == "TP53" & Variant_Classification == "Missense_Mutation") p53_missense_data$mutation_annot <- pc_data$mutation_annot p53_missense_data$primary_site <- "tp53_missense" p53_truncate_data <- list() p53_truncate_data$viabilities <- dplyr::select(pc_data$viabilities, -p53_missense_cl) # p53_truncate_data$mutations <- t(pc_data$mutations["TP53",which(!colnames(pc_data$mutations) %in% p53_missense_cl)]) # rownames(p53_truncate_data$mutations) <- "TP53" # p53_truncate_data$CNalterations <- t(pc_data$CNalterations["TP53",which(!colnames(pc_data$CNalterations) %in% p53_missense_cl)]) # rownames(p53_truncate_data$CNalterations) <- "TP53" p53_truncate_data$mutations <- pc_data$mutations[,which(!colnames(pc_data$mutations) %in% p53_missense_cl)] p53_truncate_data$CNalterations <- pc_data$CNalterations[,which(!colnames(pc_data$CNalterations) %in% p53_missense_cl)] # p53_truncate_data$mutation_annot <- pc_data$mutation_annot %>% # dplyr::filter(Hugo_Symbol == "TP53" & Variant_Classification != "Missense_Mutation") p53_truncate_data$mutation_annot <- pc_data$mutation_annot p53_truncate_data$primary_site <- "tp53_truncate" unlist(lapply(p53_missense_data, function(x){print(dim(x))})) unlist(lapply(p53_truncate_data, function(x){print(dim(x))})) hits_pancan_missense <- slidr::identifySLHits(canc_data = p53_missense_data, path_results = path_results, WT_pval_thresh = 0, fp_thresh = 1) hits_pancan_truncate <- slidr::identifySLHits(canc_data = p53_truncate_data, path_results = path_results, WT_pval_thresh = 0, fp_thresh = 1) hits_list <- list(All = hits_pancan$sl_partner_gene[hits_pancan$driver_gene == "TP53"], Missense = hits_pancan_missense$sl_partner_gene[hits_pancan_missense$driver_gene == "TP53"], Truncated = hits_pancan_truncate$sl_partner_gene[hits_pancan_truncate$driver_gene == "TP53"]) hits_list <- lapply(hits_list, function(x){x[x != "TP53"]}) cairo_pdf("~/Downloads/Slidr_Results/TP53/upset_hits.pdf", width = 6, height = 4) upset(fromList(hits_list), nsets = 3, empty.intersections = "on", matrix.color = "#066CAE", matrix.dot.alpha = 0.6, mb.ratio = c(0.7, 0.3), main.bar.color = "#2D2C2C", # mainbar.y.label = "Intersection Size", order.by = "freq", point.size = 3, sets.bar.color = "#2D2C2C", # sets.x.label = "Number of hits", text.scale = 1.3) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.