Skip to content

Commit

Permalink
v1.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
martinpacesa authored Dec 30, 2024
1 parent 50c1532 commit de45d16
Show file tree
Hide file tree
Showing 27 changed files with 651 additions and 44 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

Simple binder design pipeline using AlphaFold2 backpropagation, MPNN, and PyRosetta. Select your target and let the script do the rest of the work and finish once you have enough designs to order!

[Preprint link for BindCraft](https://www.biorxiv.org/content/10.1101/2024.09.30.615802v1)
[Take the user experience poll!](https://forms.gle/XsGHDCyHtczVbamPA)

[Preprint link for BindCraft](https://www.biorxiv.org/content/10.1101/2024.09.30.615802)

## Installation
First you need to clone this repository. Replace **[install_folder]** with the path where you want to install it.
Expand Down Expand Up @@ -72,6 +74,8 @@ rm_template_seq_design -> remove target template sequence for design (i
rm_template_seq_predict -> remove target template sequence for reprediction (increases target flexibility)
rm_template_sc_design -> remove sidechains from target template for design
rm_template_sc_predict -> remove sidechains from target template for reprediction
predict_initial_guess -> Introduce bias by providing binder atom positions as a starting point for prediction. Recommended if designs fail after MPNN optimization.
predict_bigbang -> Introduce atom position bias into the structure module for atom initilisation. Recommended if target and design are large (more than 600 amino acids).
# Design iterations
soft_iterations -> number of soft iterations (all amino acids considered at all positions)
Expand Down Expand Up @@ -100,6 +104,7 @@ use_rg_loss -> use radius of gyration loss?
weights_rg -> Design weight - radius of gyration weight for binder
use_termini_distance_loss -> Try to minimise distance between N- and C-terminus of binder? Helpful for grafting
weights_termini_loss -> Design weight - N- and C-terminus distance minimisation weight of binder
cyclize_peptide -> Make the binder/peptide design cyclic
# MPNN settings
mpnn_fix_interface -> whether to fix the interface designed in the starting trajectory
Expand Down
19 changes: 13 additions & 6 deletions bindcraft.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
####################################
### initialise PyRosetta
pr.init(f'-ignore_unrecognized_res -ignore_zero_occupancy -mute all -holes:dalphaball {advanced_settings["dalphaball_path"]} -corrections::beta_nov16 true -relax:default_repeats 1')
print(f"Running binder design for target {settings_file}")
print(f"Design settings used: {advanced_file}")
print(f"Filtering designs based on {filters_file}")

####################################
# initialise counters
Expand Down Expand Up @@ -119,7 +122,7 @@
print("")

# Proceed if there is no trajectory termination signal
if trajectory.aux["log"]['terminate'] == "":
if trajectory.aux["log"]["terminate"] == "":
# Relax binder to calculate statistics
trajectory_relaxed = os.path.join(design_paths["Trajectory/Relaxed"], design_name + ".pdb")
pr_relax(trajectory_pdb, trajectory_relaxed)
Expand Down Expand Up @@ -194,9 +197,13 @@
clear_mem()
# compile complex prediction model
complex_prediction_model = mk_afdesign_model(protocol="binder", num_recycles=advanced_settings["num_recycles_validation"], data_dir=advanced_settings["af_params_dir"],
use_multimer=multimer_validation)
complex_prediction_model.prep_inputs(pdb_filename=target_settings["starting_pdb"], chain=target_settings["chains"], binder_len=length, rm_target_seq=advanced_settings["rm_template_seq_predict"],
rm_target_sc=advanced_settings["rm_template_sc_predict"])
use_multimer=multimer_validation, use_initial_guess=advanced_settings["predict_initial_guess"], use_initial_atom_pos=advanced_settings["predict_bigbang"])
if advanced_settings["predict_initial_guess"] or advanced_settings["predict_bigbang"]:
complex_prediction_model.prep_inputs(pdb_filename=trajectory_pdb, chain='A', binder_chain='B', binder_len=length, use_binder_template=True, rm_target_seq=advanced_settings["rm_template_seq_predict"],
rm_target_sc=advanced_settings["rm_template_sc_predict"], rm_template_ic=True)
else:
complex_prediction_model.prep_inputs(pdb_filename=target_settings["starting_pdb"], chain=target_settings["chains"], binder_len=length, rm_target_seq=advanced_settings["rm_template_seq_predict"],
rm_target_sc=advanced_settings["rm_template_sc_predict"])

# compile binder monomer prediction model
binder_prediction_model = mk_afdesign_model(protocol="hallucination", use_templates=False, initial_guess=False,
Expand All @@ -221,7 +228,7 @@
save_fasta(mpnn_design_name, mpnn_sequence['seq'], design_paths)

### Predict mpnn redesigned binder complex using masked templates
mpnn_complex_statistics, pass_af2_filters = masked_binder_predict(complex_prediction_model,
mpnn_complex_statistics, pass_af2_filters = predict_binder_complex(complex_prediction_model,
mpnn_sequence['seq'], mpnn_design_name,
target_settings["starting_pdb"], target_settings["chains"],
length, trajectory_pdb, prediction_models, advanced_settings,
Expand Down Expand Up @@ -452,4 +459,4 @@
### Script finished
elapsed_time = time.time() - script_start_time
elapsed_text = f"{'%d hours, %d minutes, %d seconds' % (int(elapsed_time // 3600), int((elapsed_time % 3600) // 60), int(elapsed_time % 60))}"
print("Finished all designs. Script execution for "+str(trajectory_n)+" trajectories took: "+elapsed_text)
print("Finished all designs. Script execution for "+str(trajectory_n)+" trajectories took: "+elapsed_text)
9 changes: 6 additions & 3 deletions bindcraft.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ while true ; do
esac
done

# Ensure that SETTINGS is not empty
if [ -z "$SETTINGS" ]; then
echo "Error: The -s or --settings option is required."
exit 1
fi

echo "Running the BindCraft pipeline"
echo "Running binder design for target ${SETTINGS}"
echo "Design settings used: ${ADVANCED}"
echo "Filtering designs based on ${FILTERS}"
python -u "${SCRIPT_DIR}/bindcraft.py" --settings "${SETTINGS}" --filters "${FILTERS}" --advanced "${ADVANCED}"
2 changes: 1 addition & 1 deletion functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
#os.environ["SLURM_STEP_NODELIST"] = os.environ["SLURM_NODELIST"]
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=BiopythonWarning)
warnings.simplefilter(action='ignore', category=BiopythonWarning)
10 changes: 9 additions & 1 deletion functions/colabdesign_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def binder_hallucination(design_name, starting_pdb, chain, target_hotspot_residu
return af_model

# run prediction for binder with masked template target
def masked_binder_predict(prediction_model, binder_sequence, mpnn_design_name, target_pdb, chain, length, trajectory_pdb, prediction_models, advanced_settings, filters, design_paths, failure_csv, seed=None):
def predict_binder_complex(prediction_model, binder_sequence, mpnn_design_name, target_pdb, chain, length, trajectory_pdb, prediction_models, advanced_settings, filters, design_paths, failure_csv, seed=None):
prediction_stats = {}

# clean sequence
Expand All @@ -246,6 +246,10 @@ def masked_binder_predict(prediction_model, binder_sequence, mpnn_design_name, t
pass_af2_filters = True
filter_failures = {}

if advanced_settings["cyclize_peptide"]:
# make macrocycle peptide
add_cyclic_offset(prediction_model)

# start prediction per AF2 model, 2 are used by default due to masked templates
for model_num in prediction_models:
# check to make sure prediction does not exist already
Expand Down Expand Up @@ -313,6 +317,10 @@ def predict_binder_alone(prediction_model, binder_sequence, mpnn_design_name, le
binder_sequence = re.sub("[^A-Z]", "", binder_sequence.upper())
prediction_model.set_seq(binder_sequence)

if advanced_settings["cyclize_peptide"]:
# make macrocycle peptide
add_cyclic_offset(prediction_model)

# predict each model separately
for model_num in prediction_models:
# check to make sure prediction does not exist already
Expand Down
2 changes: 1 addition & 1 deletion functions/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def save_fasta(design_name, sequence, design_paths):
def clean_pdb(pdb_file):
# Read the pdb file and filter relevant lines
with open(pdb_file, 'r') as f_in:
relevant_lines = [line for line in f_in if line.startswith(('ATOM', 'HETATM', 'MODEL', 'TER', 'END'))]
relevant_lines = [line for line in f_in if line.startswith(('ATOM', 'HETATM', 'MODEL', 'TER', 'END', 'LINK'))]

# Write the cleaned lines back to the original pdb file
with open(pdb_file, 'w') as f_out:
Expand Down
18 changes: 15 additions & 3 deletions notebooks/BindCraft.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,11 @@
"# @markdown ---\n",
"# @markdown Which binder design protocol to run? Default is recommended. \"Beta-sheet\" promotes the design of more beta sheeted proteins, but requires more sampling. \"Peptide\" is optimised for helical peptide binders.\n",
"design_protocol = \"Default\" # @param [\"Default\",\"Beta-sheet\",\"Peptide\"]\n",
"# @markdown What interface design method to use?. \"AlphaFold2\" is the default, interface is generated by AlphaFold2. \"MPNN\" uses soluble MPNN to optimise the interface, but majority of residues still originate from AlphaFold2.\n",
"# @markdown What prediction protocol to use?. \"Default\" performs single sequence prediction of the binder. \"HardTarget\" uses initial guess to improve complex prediction for difficult targets, but might introduce some bias.\n",
"prediction_protocol = \"Default\" # @param [\"Default\",\"HardTarget\"]\n",
"# @markdown What interface design method to use?. \"AlphaFold2\" is the default, interface is generated by AlphaFold2. \"MPNN\" uses soluble MPNN to optimise the interface.\n",
"interface_protocol = \"AlphaFold2\" # @param [\"AlphaFold2\",\"MPNN\"]\n",
"# @markdown What target template protocol to use? \"Default\" allows for limited amount flexibility. \"Masked\" allows for greater target flexibility on both sidechain and backbone level, but might result in reduced experimental success rates.\n",
"# @markdown What target template protocol to use? \"Default\" allows for limited amount flexibility. \"Masked\" allows for greater target flexibility on both sidechain and backbone level.\n",
"template_protocol = \"Default\" # @param [\"Default\",\"Masked\"]\n",
"# @markdown ---\n",
"\n",
Expand All @@ -194,6 +196,16 @@
"else:\n",
" raise ValueError(f\"Unsupported template protocol\")\n",
"\n",
"if design_protocol in [\"Peptide\"]:\n",
" prediction_protocol_tag = \"\"\n",
"else:\n",
" if prediction_protocol == \"Default\":\n",
" prediction_protocol_tag = \"\"\n",
" elif prediction_protocol == \"HardTarget\":\n",
" prediction_protocol_tag = \"_hardtarget\"\n",
" else:\n",
" raise ValueError(f\"Unsupported prediction protocol\")\n",
"\n",
"advanced_settings_path = \"/content/bindcraft/settings_advanced/\" + design_protocol_tag + interface_protocol_tag + template_protocol_tag + \".json\"\n",
"\n",
"currenttime = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
Expand All @@ -212,7 +224,7 @@
"#@title Filters\n",
"# @markdown ---\n",
"# @markdown Which filters for designs to use? \"Default\" are recommended, \"Peptide\" are for the design of peptide binders, \"Relaxed\" are more permissive but may result in fewer experimental successes, \"Peptide_Relaxed\" are more permissive filters for non-helical peptides, \"None\" is for benchmarking.\n",
"filter_option = \"Peptide\" # @param [\"Default\", \"Peptide\", \"Relaxed\", \"Peptide_Relaxed\", \"None\"]\n",
"filter_option = \"Default\" # @param [\"Default\", \"Peptide\", \"Relaxed\", \"Peptide_Relaxed\", \"None\"]\n",
"# @markdown ---\n",
"\n",
"if filter_option == \"Default\":\n",
Expand Down
7 changes: 5 additions & 2 deletions settings_advanced/betasheet_4stage_multimer.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
"rm_template_seq_predict": false,
"rm_template_sc_design": false,
"rm_template_sc_predict": false,
"predict_initial_guess": false,
"predict_bigbang": false,
"soft_iterations": 75,
"temporary_iterations": 45,
"hard_iterations": 5,
Expand All @@ -23,7 +25,7 @@
"intra_contact_distance": 14.0,
"inter_contact_distance": 20.0,
"intra_contact_number": 2,
"inter_contact_number": 1,
"inter_contact_number": 2,
"weights_helicity": -2.0,
"random_helicity": false,
"use_i_ptm_loss": true,
Expand All @@ -32,6 +34,7 @@
"weights_rg": 0.3,
"use_termini_distance_loss": false,
"weights_termini_loss": 0.1,
"cyclize_peptide": false,
"enable_mpnn": true,
"mpnn_fix_interface": true,
"num_seqs": 20,
Expand All @@ -57,7 +60,7 @@
"max_trajectories": false,
"enable_rejection_check": true,
"acceptance_rate": 0.01,
"start_monitoring": 200,
"start_monitoring": 600,
"af_params_dir": "",
"dssp_path": "",
"dalphaball_path": ""
Expand Down
7 changes: 5 additions & 2 deletions settings_advanced/betasheet_4stage_multimer_flexible.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
"rm_template_seq_predict": true,
"rm_template_sc_design": false,
"rm_template_sc_predict": false,
"predict_initial_guess": false,
"predict_bigbang": false,
"soft_iterations": 75,
"temporary_iterations": 45,
"hard_iterations": 5,
Expand All @@ -23,7 +25,7 @@
"intra_contact_distance": 14.0,
"inter_contact_distance": 20.0,
"intra_contact_number": 2,
"inter_contact_number": 1,
"inter_contact_number": 2,
"weights_helicity": -2.0,
"random_helicity": false,
"use_i_ptm_loss": true,
Expand All @@ -32,6 +34,7 @@
"weights_rg": 0.3,
"use_termini_distance_loss": false,
"weights_termini_loss": 0.1,
"cyclize_peptide": false,
"enable_mpnn": true,
"mpnn_fix_interface": true,
"num_seqs": 20,
Expand All @@ -57,7 +60,7 @@
"max_trajectories": false,
"enable_rejection_check": true,
"acceptance_rate": 0.01,
"start_monitoring": 200,
"start_monitoring": 600,
"af_params_dir": "",
"dssp_path": "",
"dalphaball_path": ""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"omit_AAs": "C",
"force_reject_AA": false,
"use_multimer_design": true,
"design_algorithm": "4stage",
"sample_models": true,
"rm_template_seq_design": true,
"rm_template_seq_predict": true,
"rm_template_sc_design": false,
"rm_template_sc_predict": false,
"predict_initial_guess": true,
"predict_bigbang": false,
"soft_iterations": 75,
"temporary_iterations": 45,
"hard_iterations": 5,
"greedy_iterations": 15,
"greedy_percentage": 5,
"save_design_animations": true,
"save_design_trajectory_plots": true,
"weights_plddt": 0.15,
"weights_pae_intra": 0.4,
"weights_pae_inter": 0.1,
"weights_con_intra": 0.4,
"weights_con_inter": 0.5,
"intra_contact_distance": 14.0,
"inter_contact_distance": 20.0,
"intra_contact_number": 2,
"inter_contact_number": 2,
"weights_helicity": -2.0,
"random_helicity": false,
"use_i_ptm_loss": true,
"weights_iptm": 0.05,
"use_rg_loss": true,
"weights_rg": 0.3,
"use_termini_distance_loss": false,
"weights_termini_loss": 0.1,
"cyclize_peptide": false,
"enable_mpnn": true,
"mpnn_fix_interface": true,
"num_seqs": 20,
"max_mpnn_sequences": 2,
"sampling_temp": 0.1,
"backbone_noise": 0.00,
"model_path": "v_48_020",
"mpnn_weights": "soluble",
"save_mpnn_fasta": false,
"num_recycles_design": 1,
"num_recycles_validation": 3,
"optimise_beta": true,
"optimise_beta_extra_soft": 0,
"optimise_beta_extra_temp": 0,
"optimise_beta_recycles_design": 3,
"optimise_beta_recycles_valid": 3,
"remove_unrelaxed_trajectory": true,
"remove_unrelaxed_complex": true,
"remove_binder_monomer": true,
"zip_animations": true,
"zip_plots": true,
"save_trajectory_pickle": false,
"max_trajectories": false,
"enable_rejection_check": true,
"acceptance_rate": 0.01,
"start_monitoring": 600,
"af_params_dir": "",
"dssp_path": "",
"dalphaball_path": ""
}
67 changes: 67 additions & 0 deletions settings_advanced/betasheet_4stage_multimer_hardtarget.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"omit_AAs": "C",
"force_reject_AA": false,
"use_multimer_design": true,
"design_algorithm": "4stage",
"sample_models": true,
"rm_template_seq_design": false,
"rm_template_seq_predict": false,
"rm_template_sc_design": false,
"rm_template_sc_predict": false,
"predict_initial_guess": true,
"predict_bigbang": false,
"soft_iterations": 75,
"temporary_iterations": 45,
"hard_iterations": 5,
"greedy_iterations": 15,
"greedy_percentage": 5,
"save_design_animations": true,
"save_design_trajectory_plots": true,
"weights_plddt": 0.15,
"weights_pae_intra": 0.4,
"weights_pae_inter": 0.1,
"weights_con_intra": 0.4,
"weights_con_inter": 0.5,
"intra_contact_distance": 14.0,
"inter_contact_distance": 20.0,
"intra_contact_number": 2,
"inter_contact_number": 2,
"weights_helicity": -2.0,
"random_helicity": false,
"use_i_ptm_loss": true,
"weights_iptm": 0.05,
"use_rg_loss": true,
"weights_rg": 0.3,
"use_termini_distance_loss": false,
"weights_termini_loss": 0.1,
"cyclize_peptide": false,
"enable_mpnn": true,
"mpnn_fix_interface": true,
"num_seqs": 20,
"max_mpnn_sequences": 2,
"sampling_temp": 0.1,
"backbone_noise": 0.00,
"model_path": "v_48_020",
"mpnn_weights": "soluble",
"save_mpnn_fasta": false,
"num_recycles_design": 1,
"num_recycles_validation": 3,
"optimise_beta": true,
"optimise_beta_extra_soft": 0,
"optimise_beta_extra_temp": 0,
"optimise_beta_recycles_design": 3,
"optimise_beta_recycles_valid": 3,
"remove_unrelaxed_trajectory": true,
"remove_unrelaxed_complex": true,
"remove_binder_monomer": true,
"zip_animations": true,
"zip_plots": true,
"save_trajectory_pickle": false,
"max_trajectories": false,
"enable_rejection_check": true,
"acceptance_rate": 0.01,
"start_monitoring": 600,
"af_params_dir": "",
"dssp_path": "",
"dalphaball_path": ""
}
Loading

0 comments on commit de45d16

Please sign in to comment.