Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lanthipeptide and recon multistate design update #263

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ SET_ICOOR 3H4 -60.0 70.0 1.089 C4 C3 C2
#Unpatched H gets its geometry from LOWER, which we have removed

SET_POLYMER_CONNECT LOWER NONE
ADD_PROPERTY LOWER_TERMINUS ## implies terminus

# If modified, no longer canonical.
DELETE_PROPERTY CANONICAL_AA
Expand Down
509 changes: 142 additions & 367 deletions source/src/protocols/cyclic_peptide/crosslinker/lanthionine_util.cc

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@ namespace protocols {
namespace cyclic_peptide {
namespace crosslinker {

// from M062x_def2TZVP simulation
// from Methionine
constexpr core::Real LANTHIONINE_UTIL_LANTHIONINE_BOND_LENGTH = 1.807;
constexpr core::Real LANTHIONINE_UTIL_LANTHIONINE_BOND_C_ANGLE = 2.011; // 115.230 degrees
constexpr core::Real LANTHIONINE_UTIL_LANTHIONINE_BOND_S_ANGLE = 1.801; // 103.176 degrees
// from MP2/cc-pvtz geometry optimization
constexpr core::Real LANTHIONINE_UTIL_METHYLLANTHIONINE_BOND_CG_ANGLE = 1.864; // 106.790 degrees SCC to CG
constexpr core::Real LANTHIONINE_UTIL_METHYLLANTHIONINE_BOND_CA_ANGLE = 1.963; // 112.460 degrees SCC to CA
constexpr core::Real LANTHIONINE_UTIL_METHYLLANTHIONINE_BOND_CB_ANGLE = 1.927; // 110.440 degrees CA-CB-CG

/// @brief Given a pose and two residues, set up the lanthionine variant types.
/// @details Sidechainres gets SIDECHAIN_CONJUGATION; dalares gets ACETYLATED_NTERMINUS_CONNECTION_VARIANT.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3459,7 +3459,7 @@ SimpleCycpepPredictApplication::add_closebond_logic_lanthipeptide(
debug_assert( cyclization_type() == SCPA_lanthipeptide );
core::Size ala_res = 0;
core::Size cys_res = 0;
if ( pose->residue_type(cyclization_point_start).base_name() == "CYS" ) {
if ( pose->residue_type(cyclization_point_start).base_name() == "CYS" || pose->residue_type(cyclization_point_start).base_name() == "DCYS" ) {
ala_res = cyclization_point_end;
cys_res = cyclization_point_start;
} else {
Expand Down
29 changes: 23 additions & 6 deletions source/src/protocols/recon_design/FindConsensusSequence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ void FindConsensusSequence::apply( core::pose::Pose & pose ) {
parse_resfiles();

// Get the designable sequences of all poses
utility::vector1< std::string > all_pose_sequences;
utility::vector1< utility::vector1< std::string > > all_pose_sequences;
for ( core::Size ii = 1; ii <= designable_residues_.size(); ++ii ) {
all_pose_sequences.push_back(
get_designable_sequence ( *poses_[ii], designable_residues_[ii] )
Expand Down Expand Up @@ -230,24 +230,41 @@ void FindConsensusSequence::apply_mpi( core::pose::Pose & pose ) {
utility::vector1< core::Size > my_designable_residues = get_designable_residues( pose, this_nodes_resfile );

/// Make a string out of the AAs at my designable positions in the current state
std::string my_sequence = get_designable_sequence ( pose, my_designable_residues );
utility::vector1< std::string > my_sequence = get_designable_sequence ( pose, my_designable_residues );

utility::vector1<std::string> all_pose_sequences (n_procs_);
std::string pass_seq;
for( const std::string& resi_base_name: my_sequence) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rosetta convention is "east const": std::string const &

pass_seq += resi_base_name + " ";
}

utility::vector1< utility::vector1<std::string> > all_pose_sequences (n_procs_);
for ( core::Size ii = 1; ii <= n_procs_; ++ii ) {
if ( rank_ == ii ) {
for ( core::Size jj = 1; jj <= n_procs_; ++jj ) {
if ( rank_!=jj ) utility::send_string_to_node( jj-1, my_sequence ); // node ranks are 0-indexed
if ( rank_!=jj ) utility::send_string_to_node( jj-1, pass_seq ); // node ranks are 0-indexed
else all_pose_sequences[jj] = my_sequence;
}
} else {
all_pose_sequences[ii] = utility::receive_string_from_node( ii-1 ); // node ranks are 0-indexed
//all_pose_sequences[ii] = utility::receive_string_from_node( ii-1 ); // node ranks are 0-indexed
std::string passed_seq;
passed_seq = utility::receive_string_from_node( ii-1 ); // node ranks are 0-indexed
//Need to split passed_seq by spaces
std::istringstream iss(passed_seq);
std::string resi_name;
while (iss >> resi_name){
all_pose_sequences[ii].push_back(resi_name);
}
}
}

if ( debug_ ) {
TR << "printing all AAs at designable positions to check indices: "<< std::endl;
for ( core::Size i = 1; i <= all_pose_sequences.size(); ++i ) {
TR << i << ":" << all_pose_sequences[ i ] << ", ";
TR << i << ":";
for (const std::string& resi : all_pose_sequences[i]) {
TR << resi << "-";
}
TR << ", ";
}
TR << std::endl;
}
Expand Down
35 changes: 26 additions & 9 deletions source/src/protocols/recon_design/MSDMover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include <protocols/minimization_packing/PackRotamersMover.hh>

#include <sstream>
#include <utility/string_util.hh>
#include <utility/mpi_util.hh>

Expand Down Expand Up @@ -168,7 +169,7 @@ MSDMover::apply( core::pose::Pose & pose ) {
setup_mover( pose );

// Find the sequence of the other poses
utility::vector1< std::string > other_pose_sequences;
utility::vector1< utility::vector1< std::string > > other_pose_sequences;
for ( core::Size ii = 1; ii <= poses_.size(); ++ii ) {
if ( ii != current_pose_ ) {
other_pose_sequences.push_back(
Expand Down Expand Up @@ -217,7 +218,7 @@ MSDMover::apply( core::pose::Pose & pose ) {
/// applied so they can be removed later
utility::vector1< core::scoring::constraints::ConstraintCOP >
MSDMover::apply_linked_constraints( core::pose::Pose & pose,
utility::vector1< std::string > other_pose_sequences,
utility::vector1< utility::vector1< std::string > > other_pose_sequences,
utility::vector1< core::Size > my_designable_residues ) {

using namespace core::scoring::constraints;
Expand Down Expand Up @@ -280,34 +281,50 @@ MSDMover::apply_mpi( core::pose::Pose & pose ) {
/// Find out my designable residues from my pose and my resfile
utility::vector1< core::Size > my_designable_residues = get_designable_residues( pose, this_nodes_resfile );

/// Make a string out of the AAs at my designable positions in the current state
std::string my_sequence = get_designable_sequence ( pose, my_designable_residues );
/// Make a string vector out of the AAs at my designable positions in the current state
utility::vector1< std::string > my_sequence = get_designable_sequence ( pose, my_designable_residues );

std::string pass_seq;
for( const std::string& resi_base_name: my_sequence) {
pass_seq += resi_base_name + " ";
}

/// Get the AAs at designable positions of the other states I need to cooperate with
utility::vector1<std::string> other_pose_sequences( n_procs );
utility::vector1< utility::vector1<std::string> > other_pose_sequences( n_procs );
for ( core::Size ii = 1; ii <= n_procs; ++ii ) {
if ( rank == ii ) {
for ( core::Size jj = 1; jj <= n_procs; ++jj ) {
if ( rank!=jj ) utility::send_string_to_node( jj-1, my_sequence ); // node ranks are 0-indexed
if ( rank!=jj ) utility::send_string_to_node( jj-1, pass_seq ); // node ranks are 0-indexed
else other_pose_sequences[jj] = my_sequence;
}
} else {
other_pose_sequences[ii] = utility::receive_string_from_node( ii-1 ); // node ranks are 0-indexed
std::string passed_seq;
passed_seq = utility::receive_string_from_node( ii-1 ); // node ranks are 0-indexed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Combine this on a single line. (In general, when possible, try to declare variables on the same line you first initialize them.)

//Need to split passed_seq by spaces
std::istringstream iss(passed_seq);
std::string resi_name;
while (iss >> resi_name){
other_pose_sequences[ii].push_back(resi_name);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's split()/split_whitespace() functions in utility/string_util.hh which does basically this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I use that fxn, is there any way to avoid this awkward passing the output to other_pose_sequences:

utility::vector1std::string split_seq = split_whitespace(passed_seq);
for (std::string const &resi_name : split_seq) {
other_pose_sequences[ii].push_back(resi_name);
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't have to push it one string at a time; you should be able to set other_pose_sequences[ ii ] directly as the return value from split_whitespace.

}
}

/// Let the master make sure all the sequences are the same length,
/// i.e. all the states have same number of designable residues
if ( master ) {
for ( std::string const & sequence: other_pose_sequences ) {
for ( utility::vector1< std::string > const & sequence: other_pose_sequences ) {
if ( sequence.size() != my_sequence.size() ) {
utility_exit_with_message( "Error: all states must have the same number of designable residues" );
}
}
}

if ( debug_ ) {
TR << "my sequence is " << my_sequence << std::endl;
TR << "my sequence is ";
for( core::Size i = 1; i <= my_sequence.size(); ++i ) {
TR << my_sequence[i] << " ";
}
TR << std::endl;
}

/// Run my multistate design
Expand Down
2 changes: 1 addition & 1 deletion source/src/protocols/recon_design/MSDMover.hh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public:
/// encourage poses to adopt same sequence. Returns a COP of the constraints that were
/// applied so they can be removed later
utility::vector1< core::scoring::constraints::ConstraintCOP >
apply_linked_constraints( Pose & pose, utility::vector1< std::string > other_pose_sequences,
apply_linked_constraints( Pose & pose, utility::vector1< utility::vector1< std::string > > other_pose_sequences,
utility::vector1< core::Size > my_designable_residues );

/// @brief Initialize mover by checking that input poses were passed correctly,
Expand Down
31 changes: 20 additions & 11 deletions source/src/protocols/recon_design/recon_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,31 +45,40 @@ get_designable_residues( core::pose::Pose & pose, std::string resfile ) {

/// Based on a pose and the indices of all designable residues, get a
/// string of all designable AAs concatenated
std::string
utility::vector1< std::string >
get_designable_sequence ( core::pose::Pose & pose, utility::vector1< core::Size > designable_residues ) {
std::string sequence = "";
//std::string sequence = "";
utility::vector1< std::string > sequence;
for ( core::Size seqpos: designable_residues ) {
sequence += pose.residue( seqpos ).name1();
//sequence += pose.residue( seqpos ).name1();
sequence.push_back(pose.residue_type( seqpos ).base_name());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the issues you'll run into here is that it looks like ResidueTypeConstraint takes a string which it compares with the name3() of the residue types. In general, the base_name() is not going to be equal to the name3(). (It is equal for standard residues, but isn't necessarily for noncanonicals and other residues which don't have a simple one letter code -> base_name() correspondence.)

(My recommendation is to either use name3() here, or add a mode to ResidueTypeConstraint which works on base_name().)

}
return sequence;
}

/// Based on a list of sequences from poses, get all the AAs present at
/// the position given by position_no
utility::vector1< std::string >
get_candidate_AAs( utility::vector1< std::string > other_pose_sequences,
get_candidate_AAs( utility::vector1< utility::vector1< std::string > > other_pose_sequences,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably want to pass unchanged vectors by const & rather than by value -- it saves data copies. (It's a pre-existing issue here, but is exasperated by adding the nesting and larger strings.)

core::Size position_no ) {

utility::vector1< std::string > candidate_AAs;

// Iterate through all poses in my collection and find their AA at this position
for ( std::string const & sequence: other_pose_sequences ) {
char current_AA = sequence[ position_no-1 ]; // string is zero indexed
std::string current_AA_3letter = core::chemical::name_from_aa( core::chemical::aa_from_oneletter_code( current_AA ) );
if ( std::find( candidate_AAs.begin(), candidate_AAs.end(), current_AA_3letter ) == candidate_AAs.end() ) {
candidate_AAs.push_back( current_AA_3letter );
}
}
//for ( std::string const & sequence: other_pose_sequences ) {
// char current_AA = sequence[ position_no-1 ]; // string is zero indexed
// std::string current_AA_3letter = core::chemical::name_from_aa( core::chemical::aa_from_oneletter_code( current_AA ) );
// if ( std::find( candidate_AAs.begin(), candidate_AAs.end(), current_AA_3letter ) == candidate_AAs.end() ) {
// candidate_AAs.push_back( current_AA_3letter );
// }
//}
for ( utility::vector1< std::string > const & sequence: other_pose_sequences ) {
std::string current_AA = sequence[ position_no ]; // vector1 is 1 indexed
//std::string current_AA_3letter = core::chemical::name_from_aa( core::chemical::aa_from_oneletter_code( current_AA ) );
if ( std::find( candidate_AAs.begin(), candidate_AAs.end(), current_AA ) == candidate_AAs.end() ) {
candidate_AAs.push_back( current_AA );
}
}

return candidate_AAs;
}
Expand Down
4 changes: 2 additions & 2 deletions source/src/protocols/recon_design/recon_util.hh
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ get_designable_residues( core::pose::Pose & pose, std::string resfile );

/// @brief Based on a pose and the indices of all designable residues, get a
/// string of all designable AAs concatenated
std::string
utility::vector1< std::string >
get_designable_sequence ( core::pose::Pose & pose, utility::vector1< core::Size > designable_residues );

/// @brief Based on a list of sequences from poses, get all the AAs present at
/// the position given by position_no
utility::vector1< std::string > get_candidate_AAs(
utility::vector1< std::string > other_pose_sequences,
utility::vector1< utility::vector1< std::string > > other_pose_sequences,
core::Size position_no );

/// @brief Given a list of poses, find the index of a particular pose
Expand Down
17 changes: 13 additions & 4 deletions source/test/protocols/recon_design/MSDMoverTest.cxxtest.hh
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,19 @@ public:
msd_mover->setup_mover( *pose1 );

// Find the sequence of the other poses
utility::vector1< std::string > other_pose_sequences;
other_pose_sequences.push_back( "A" );
other_pose_sequences.push_back( "C" );
other_pose_sequences.push_back( "D" );
utility::vector1< utility::vector1< std::string > > other_pose_sequences;
//other_pose_sequences.push_back( "A" );
//other_pose_sequences.push_back( "C" );
//other_pose_sequences.push_back( "D" );
utility::vector1< std::string > ala_seq;
ala_seq.push_back("ALA");
other_pose_sequences.push_back(ala_seq);
utility::vector1< std::string > cys_seq;
cys_seq.push_back("CYS");
other_pose_sequences.push_back(cys_seq);
utility::vector1< std::string > asp_seq;
asp_seq.push_back("ASP");
other_pose_sequences.push_back(asp_seq);

utility::vector1< utility::vector1< core::Size > > designable_residues = msd_mover->designable_residues();

Expand Down