####***AddModLabel: add labels by raw_sequence***#########
Add_label = function(i,df,keys){
df_sub = df[keys == unique(keys)[i],]
df_sub$label = i
return(df_sub)
}
###***Sub_raw: modified sequence
Sub_raw = function(raw,df1,df2,raws_user){
user_sub = df1[which(raws_user == raw),]
###---***for one raw in raws***---###
label_sub = df2[with(df2,`Raw file` == raw),]
label_sub$`Raw file` = user_sub$rawfilepath
#browser()
label_sub$`Modified sequence` = gsub('\\(\\w+\\s?\\(\\w\\)\\)|\\(\\w+\\)',
paste0('(',tolower(user_sub$modification),')'),
label_sub$`Modified sequence`)
return(label_sub)
}
Find_cols = function(df,rex){grep(paste0('^',rex,'$'),colnames(df))}
###***AddModLabel:add modification symbols into sequence***####
AddModLabel = function(i,seqs,poss,modlabel){
seq = seqs[i]
pos = as.numeric(poss[[i]])
split = unlist(strsplit(seq,''))
mod.res = split[pos]
split[pos] = paste0(mod.res,modlabel)
modseq = paste(split,collapse = '')
return(modseq)
}
#Get indicies beyond the mw range of user-specified modification and fixed modification
Ind_modtype_err = function(MW,flo,cei,c_flo,c_cei){
mw = as.numeric(MW)
ind_within = which(flo < mw & mw < cei | c_flo < mw & mw < c_cei)
}
#get the mw and position within range of fixed modification
Ind_mod_fix = function(MW,c_flo,c_cei){
mw = as.numeric(MW)
ind_within = which(c_flo < mw & mw < c_cei)
}
#paste mws and positions
Ind_mod_fix1 = function(MW_POS,c_flo,c_cei){
mw = as.numeric(unlist(stringr::str_extract_all(MW_POS,"[^\\(]\\d+\\.?\\d+[^\\)]")))
pos = stringr::str_remove_all(string = unlist(stringr::str_extract_all(MW_POS,'\\(\\d+\\)')),pattern = '[\\(\\)]')
ind_within = which(c_flo < mw & mw < c_cei)
mw_1 = mw[-ind_within]
pos_1 = pos[-ind_within]
pas = paste(paste(mw_1,paste0('(',pos_1,')')),collapse=';')
return(pas)
}
###***According to each row of the user table,get the subset of flattened file and select
###***columns required by mms2plot
Convert_by_usr <- function(i,table){
# i is the row number of table
# table is user_table(dataframe)
#i=1
#user_table = user_table[i,]
user_table = table[i,]
raw_path = user_table$rawfilepath
# raw_path = paste0('inst/extdata/',raw_path)
path_mzid_user = user_table$mzidpath
# path_mzid_user = paste0('inst/extdata/',path_mzid_user)
######******MZID --> flatten file
mzid = mzID::mzID(path_mzid_user)
fla = mzID::flatten(mzid)
fla = fla[with(fla,rank == 1),]#comet,for MSGFPlus,almost every rank was '1'
# browser()
######******filter modification mass shift in flatten file
inv.col = c("[Ss]tart","[Ee]nd","[Pp]re","[Pp]ost","[Aa]ccession","[Dd]escription")
num_cols = unlist(lapply(X = inv.col,FUN = Find_cols,df = fla))
if(length(num_cols) > 0){
fla = fla[!duplicated(fla[,-num_cols]),]
}
# fla = fla[2,]
mod = subset(fla,fla$modified == TRUE)
unmod = subset(fla,fla$modified == FALSE)
######******Extract all of mw and modified positions
if( nrow(mod) > 0 ){
###***range of mw,only for user_table with single row
accu_usr = user_table$mass_shift_tolerance
mw_usr = user_table$mass_shift
mw_floor = as.numeric(mw_usr) - as.numeric(accu_usr)
mw_ceiling = as.numeric(mw_usr) + as.numeric(accu_usr)
fix_c_mw = 57.021464
c_fixed_floor = as.numeric(fix_c_mw) - as.numeric(accu_usr)
c_fixed_ceiling = as.numeric(fix_c_mw) + as.numeric(accu_usr)
mw_pos=mod$modification#mass shift and position
extra_mw = stringr::str_extract_all(mw_pos,"[^\\(]\\d+\\.?\\d+[^\\)]")
extra_pos = mapply(FUN = stringr::str_remove_all,
string = stringr::str_extract_all(mw_pos,'\\(\\d+\\)'),pattern = '[\\(\\)]')
#####***Save mw within range [mw-variance,mw+variance]
#remove mw not within modification specified by user and fixed modification
inds_correct_ls = lapply(FUN = Ind_modtype_err,X = extra_mw,
flo = mw_floor,cei = mw_ceiling,
c_flo = c_fixed_floor,c_cei = c_fixed_ceiling)
count_mw = unlist(lapply(extra_mw,length))
count_correct = unlist(lapply(inds_correct_ls,length))
inds_correct = which(count_mw == count_correct)
mod_user_1 = mod[inds_correct,]#subset within mw range and fixed modification mw range
if( nrow(mod_user_1)>0 ){
mw_pos_1 = mod_user_1$modification
extra_mw_1 = stringr::str_extract_all(mw_pos_1,"[^\\(]\\d+\\.?\\d+[^\\)]")
inds_fix_within = lapply(FUN=Ind_mod_fix,X = extra_mw_1,c_flo = c_fixed_floor,c_cei = c_fixed_ceiling)
count_fix = unlist(lapply(inds_fix_within,length))
inds_fix = which(count_fix != 0)
###^^^
if(length(inds_fix) > 0){
mod_fix_within = mod_user_1[inds_fix,]#fix within
mod_nonfix = mod_user_1[-inds_fix,]#no fix
mw_pos_2 = mod_fix_within$modification
extra_mw_2 = stringr::str_extract_all(mw_pos_2,"[^\\(]\\d+\\.?\\d+[^\\)]")
count_mw_2 = unlist(lapply(extra_mw_2,length))
#When a peptide has only fixed modification,
#the modification column will be changed to NA, modified to FALSE, and merged into the unmod subset
ind_only_fix = which(count_fix[count_fix != 0] == count_mw_2)
if(length(ind_only_fix) > 0){
mod_mul_types = mod_fix_within[-ind_only_fix,]#subset containing fixed and variable modifications simultaneously
mod_only_fix = mod_fix_within[ind_only_fix,]#subset has only fixed modification
mod_only_fix$modified = FALSE
mod_only_fix$modification = NA
mw_pos_clean = unlist(lapply(X=mod_mul_types$modification,c_flo=c_fixed_floor,c_cei=c_fixed_ceiling,FUN=Ind_mod_fix1))
mod_mul_types$modification = mw_pos_clean
mod_1 = rbind(mod_mul_types,mod_nonfix)#merge into mod subset
unmod_1 = rbind(mod_only_fix,unmod)#merge into unmod subset
}else{
mod_mul_types = mod_fix_within
mw_pos_clean = unlist(lapply(X=mod_mul_types$modification,c_flo=c_fixed_floor,c_cei=c_fixed_ceiling,FUN=Ind_mod_fix1))
mod_mul_types$modification = mw_pos_clean
mod_1 = mod_mul_types
unmod_1 = unmod
}
}else{
mod_1 = mod_user_1
unmod_1 = unmod
}
######******Extract mw and modified position from flatten file******######
mw_pos_mod_1 = mod_1$modification
MWs = stringr::str_extract_all(mw_pos_mod_1,"[^\\(]\\d+\\.?\\d+[^\\)]")
POSs = mapply(FUN = stringr::str_remove_all,
string = stringr::str_extract_all(mw_pos_mod_1,'\\(\\d+\\)'),pattern = '[\\(\\)]')
######******Get modified sequence with modification labels such as "(ox)"******######
SEQ = mod_1$pepseq
modlabel = paste0('(',tolower(user_table$modification),')')
Modified_seq = mapply(AddModLabel,seq(nrow(mod_1)),MoreArgs = list(seqs=SEQ,poss=POSs,modlabel=modlabel))#modified sequences
######******Get the dataframe required by mms2plot******######
mod_1$`Modified sequence` = Modified_seq
mod_1$Modifications = user_table$modification
if(nrow(unmod_1) > 0){
unmod_1$`Modified sequence` = unmod_1$pepseq
unmod_1$Modifications = 'Unmodified'
}
bind_mod_unmod = rbind(mod_1,unmod_1)
# bind_mod_unmod$`Modified sequence` = bind_mod_unmod$pepseq
# bind_mod_unmod$Modifications = 'Unmodified'
}else if(nrow(unmod) > 0){
bind_mod_unmod = unmod
bind_mod_unmod$`Modified sequence` = bind_mod_unmod$pepseq
bind_mod_unmod$Modifications = 'Unmodified'
warning('The mass shift exceeds the threshold , please check the input!!')
}else{
stop('In the search results,there are no PSMs with variable modification that you are interested in...Please check the search process!')
}
}else if(nrow(unmod) > 0){
bind_mod_unmod = unmod
bind_mod_unmod$`Modified sequence` = bind_mod_unmod$pepseq
bind_mod_unmod$Modifications = 'Unmodified'
warning('In the search results,there are no PSMs with variable modification that you are interested in...Please check the search process!')
}else{
stop('In the search results,there are no PSMs with variable modification that you are interested in...Please check the search process!')
}
bind_mod_unmod$`Raw file` = raw_path
cols_pick = c('[Pp]roteins?','[Gg]ene.?[Nn]ames?','[Dd]escription','[Aa]ccession')
num_cols = unlist(lapply(X = cols_pick,FUN = Find_cols,df = bind_mod_unmod))
if(length(num_cols) > 0){
sel = bind_mod_unmod[,num_cols]
if(length(num_cols) > 1){
strpaste = apply(X = sel,MARGIN = 1,FUN = paste,collapse = '_')
}else{
strpaste = sel
}
GNs = stringr::str_remove(stringr::str_extract(strpaste,'GN=\\w+'),'GN=')
GNs[which(is.na(GNs))] = stringr::str_extract(strpaste[which(is.na(GNs))],'\\w{2}\\|\\w+\\|\\w+')
GNs[which(is.na(GNs))] = 'Uncharacterized'
}else{GNs = 'Uncharacterized'}
cols_select = c('Raw file','[Aa]cquisitionnum','Modifications','[Pp]epseq',
'Modified sequence','[Cc]hargestate')
num_col = unlist(lapply(X = cols_select,FUN = Find_cols,df = bind_mod_unmod))
df_mms2plot = cbind(dplyr::select(bind_mod_unmod,all_of(num_col)),GNs)
colnames(df_mms2plot) = c('Raw file','Scan number','Modifications','Sequence',
'Modified sequence','Charge','Gene Names')
return(df_mms2plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.