R/support-instancer.R

# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
# vi: set ts=2 noet:
#
# (c) Copyright Rosetta Commons Member Institutions.
# (c) This file is part of the Rosetta software suite and is made available under license.
# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
# (c) For more information, see http://www.rosettacommons.org. Questions about this can be
# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.




#'  prepare instances to be viewed in PyMol script to look at feature instances
#'  The residues_of_interest should be a data.frame with the following columns:
#'    tag, id, chain, resNum, atom
#'  See the error message below for more details
#'
#'  Run by 'pymol instances.pml' in the distances directory.
#' @export
prepare_feature_instances <- function(
	feature_instances_id,
	sample_sources,
	feature_atoms,
	output_dir){

	required_fields <- c("sample_source", "struct_id", "id", "chain", "resNum", "atom")
	if(!(all(required_fields %in% names(feature_atoms)))){
		stop(paste("\n",
			"For feature_instances_id: ", feature_instances_id, "\n",
			"\n",
			"Attempting to visualize feature instances, however the feature_atoms\n",
			"table does not contain enough information to identify feature instances.\n",
			"\n",
			"Please have the following columns:\n",
			"	sample_source:\n",
			"		-the sample source id parsed from the name of the feature database:\n",
			"		    feature_<sample_source_id>.db3\n",
			"\n",
			"	struct_id:\n",
			"		-use structures.struct_id\n",
			"		-See https://wiki.rosettacommons.org/index.php/FeaturesDatabaseSchema#StructureFeatures\n",
			"\n",
			"	id:\n",
			"		-Uniquely identify the feature eg use hbond_sites.site_id for hbond sites.\n",
			"\n",
			"	chain, resNum:\n",
			"		-If the residue_pdb_identification table exists use\n",
			"			residue_pdb_identification.chain, residue_pdb_identification.residue_number\n",
			"		-Otherwise use\n",
			"			NULL, residue.resNum\n",
			"\n",
			"The supplied feature_atoms table has the following columns:\n",
			"	", paste(names(feature_atoms), collapse=", "), "\n",
			"The following columns are missing:\n",
			"	", setdiff(required_fields, names(feature_atoms)), "\n", sep = ""))
	}

	full_instances_dir <- file.path(output_dir, "instances", feature_instances_id)

	if(!file.exists(full_instances_dir)){
		dir.create(full_instances_dir, recursive=T)
	}

	file.copy(
		file.path(base_dir, "scripts", "methods", "align_features.py"),
		file.path(full_instances_dir, "align_features.py"))

	plyr::d_ply(feature_atoms, .(sample_source), function(df){
		ss_id <- feature_atoms$sample_source[1]
		cat("extracting pdb structures from sample source '", as.character(ss_id), "'\n", sep="")
		db_fname <- sample_sources[as.character(sample_sources$sample_source) == as.character(ss_id), "fname"]
		extract_structures_to_pdbs(
			db_fname, unique(df$struct_id), file.path(full_instances_dir, ss_id))
	})

	print("done extracting pdbs")

	generate_pymol_script(
		feature_atoms, full_instances_dir)

	cat(
		"Generated ", length(unique(feature_atoms$id)), " feature instances in ",
		length(unique(feature_atoms$struct_id)), " structures.\n",
		"To run:\n",
		"\n",
		"    cd ", full_instances_dir, "\n",
		"    pymol instancer.pml\n", sep ="")
}

extract_structures_to_pdbs <- function(
	features_database_filename,
	struct_ids,
	output_dir) {

	if(!file.exists(file.path(features_database_filename))){
		stop(
			paste("Unable to extract structure from database '",
				features_database_filename, "', because the database does not exist.",
				sep=""))
	}
	if(!file.exists(file.path(output_dir))){
		dir.create(file.path(output_dir), recursive=T)
	}

	if(length(struct_ids) < 1){
		stop(
			paste("Unable to extract structures from database '",
				features_database_filename, "', becasue no structures identifiers were supplied.",
				sep=""))
	}

	# Assume usage of rosetta_database is stored in $ROSETTA3_DB
	command <-paste(
		locate_rosetta_application("format_converter"),
		" -database /home/momeara/rosetta/rosetta/rosetta_database",
		" -in:use_database",
		" -dbms:database_name \"", as.character(features_database_filename), "\"",
		" -in:dbms:struct_ids ", paste(struct_ids, collapse=" "),
		" -out:path:pdb \"", output_dir, "\"",
		" -out:no_nstruct_label",
		" -out:suffix ''",
		" -overwrite",
		" -ignore_unrecognized_res",
		" -run:no_scorefile", sep="")
	cat(command, file=file.path(output_dir, "extract_pdbs.log"), sep="\n")
	cat(command, sep="\n")
	log <- system(command, intern=T)
	cat(log, file=file.path(output_dir, "extract_pdbs.log"), sep="\n", append=TRUE)
}

generate_pymol_script <- function(
	feature_atoms,
	full_instances_dir) {

	#create the pymol pml file
	instancer_pymol_script <- file.path(full_instances_dir, "instancer.pml")

	cat(
		"####### instances.pml #######\n",
		"#\n",
		"# AUTOGENERATED BY rosetta_tests/features/scripts/methods/instancer.R\n",
		"# MODIFY AT YOUR OWN RISK\n",
		"#\n",
		"#\n",
		"# to run:\n",
		"#    cd ", as.character(full_instances_dir), "\n",
		"#    pymol instancer.pml\n",
		"\n",
		"\n",
		"run align_features.py\n",
		"\n",
		sep="", file=instancer_pymol_script, append=F)

	# Align all instances against the first
	first_inst <- NULL

	plyr::d_ply(feature_atoms, .(id), function(df){

		struct_fname <- paste(file.path(df$sample_source[1], df$struct_id[1]),".pdb", sep="")
		obj_id <- paste(df$struct_id[1], df$id[1], sep="_")
		inst_id <- paste("inst", df$id[1], sep="_")
		atoms_sele <- with(df,
			paste("", obj_id, "", chain, resNum, atom, sep="/", collapse=" or "))
		cat(
			"load ", struct_fname, ", ", obj_id, "\n",
			"cmd.select('", inst_id, "', '", atoms_sele, "')\n",
			sep="",	file=instancer_pymol_script, append=T)

		inst <- paste(obj_id, ", ", inst_id, sep="")

		if(is.null(first_inst)){
			first_inst <<- inst
			cat(
				"label ", inst_id, ", name\n",
				"orient ", inst_id, "\n\n", sep="",
				file=instancer_pymol_script, append=T)
		} else {
			cat(
				"align_features ", inst, ", ", first_inst, "\n\n",
				sep="",	file=instancer_pymol_script, append=T)
		}

	})



}
momeara/RosettaFeatures documentation built on May 23, 2019, 6:07 a.m.