Skip to content

Commit 88e5efa

Browse files
committed
mmseqs gpu support and README addition
1 parent 00de5b4 commit 88e5efa

File tree

2 files changed

+69
-8
lines changed

2 files changed

+69
-8
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,41 @@ In some cases using precomputed database can still be useful. For the following
108108

109109
If no index was created (`MMSEQS_NO_INDEX=1` was set), then `--db-load-mode` does not do anything and can be ignored.
110110

111+
### Generating MSAs on the GPU
112+
113+
Recently [GPU-accelerated search for MMSeqs](https://www.biorxiv.org/content/10.1101/2024.11.13.623350v1) was introduced and is now supported in ColabFold. To leverage it, you will need to ajdust the database setup and how you run ⁠`colabfold_search`⁠.
114+
115+
#### GPU database setup
116+
117+
To setup the GPU databases, you will need to run the ⁠`setup_databases.sh`⁠ command with ⁠`GPU=1`⁠:
118+
119+
```shell
120+
GPU=1 ./setup_databases.sh /path/to/db_folder
121+
```
122+
123+
This will download and setup the GPU databases in the specified folder. Note that here we do not pass ⁠`MMSEQS_NO_INDEX=1`⁠ as an argument since the indices are useful in the GPU search since we will keep them in the GPU memory.
124+
125+
#### GPU search with ⁠ colabfold_search ⁠
126+
127+
To run the MSA search on the GPU, it is recommended (although not required) to start a GPU server before running the search; this server will keep the indices in the GPU memory and will be used to accelerate the search. To start a GPU server, run:
128+
129+
```shell
130+
mmseqs gpuserver /path/to/db_folder/colabfold_envdb_202108_db --max-seqs 10000 --db-load-mode 0 --prefilter-mode 1 &
131+
PID1=$!
132+
mmseqs gpuserver /path/to/db_folder/uniref30_2302 --max-seqs 10000 --db-load-mode 0 --prefilter-mode 1 &
133+
PID2=$!
134+
```
135+
136+
By default, this server will use all available GPUs and split the database up evenly across them. If you want to restrict the numbers of GPU used, you can set the environment variable ⁠`CUDA_VISIBLE_DEVICES`⁠ to a specific GPU or set of GPUs, e.g., ⁠`CUDA_VISIBLE_DEVICES=0,1`⁠. You can control how many sequences are loaded onto the GPU with the ⁠`--max-seqs`⁠ option. If your database is larger than the available GPU memory, the GPU server will efficiently swap the required data in and out of the GPU memory, overlapping data transfer and computation. The GPU server will be started in the background and will continue to run until you stop it explicitly via killing the process via ⁠`kill $PID1`⁠ and ⁠`kill $PID2`⁠.
137+
138+
You can then run ⁠ colabfold_search ⁠ with the ⁠`--gpu`⁠ and ⁠`--gpu-server`⁠ option enabled:
139+
140+
```shell
141+
colabfold_search --mmseqs /path/to/bin/mmseqs --gpu 1 --gpu-server 1 input_sequences.fasta /path/to/db_folder msas
142+
```
143+
144+
You can also run the search only with the ⁠`--gpu`⁠ option enabled if you do not want to start a GPU server, but the GPU server option is generally faster. Similarly to the GPU server, you can control with GPUs are used for the search via the ⁠`CUDA_VISIBLE_DEVICES` environment variable.
145+
111146
### Tutorials & Presentations
112147
- ColabFold Tutorial presented at the Boston Protein Design and Modeling Club. [[video]](https://www.youtube.com/watch?v=Rfw7thgGTwI) [[slides]](https://docs.google.com/presentation/d/1mnffk23ev2QMDzGZ5w1skXEadTe54l8-Uei6ACce8eI).
113148

colabfold/mmseqs/search.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def mmseqs_search_monomer(
6464
s: float = 8,
6565
db_load_mode: int = 2,
6666
threads: int = 32,
67+
gpu: int = 0,
68+
gpu_server: int = 0,
6769
unpack: bool = True,
6870
):
6971
"""Run mmseqs with a local colabfold database set
@@ -106,11 +108,16 @@ def mmseqs_search_monomer(
106108
dbSuffix3 = ".idx"
107109

108110
search_param = ["--num-iterations", "3", "--db-load-mode", str(db_load_mode), "-a", "-e", "0.1", "--max-seqs", "10000"]
109-
search_param += ["--prefilter-mode", str(prefilter_mode)]
110-
if s is not None:
111-
search_param += ["-s", "{:.1f}".format(s)]
111+
if gpu:
112+
search_param += ["--gpu", str(gpu), "--prefilter-mode", "1"] # gpu version only supports ungapped prefilter currently
112113
else:
113-
search_param += ["--k-score", "'seq:96,prof:80'"]
114+
search_param += ["--prefilter-mode", str(prefilter_mode)]
115+
if s is not None: # sensitivy can only be set for non-gpu version, gpu version runs at max sensitivity
116+
search_param += ["-s", "{:.1f}".format(s)]
117+
else:
118+
search_param += ["--k-score", "'seq:96,prof:80'"]
119+
if gpu_server:
120+
search_param += ["--gpu-server", str(gpu_server)]
114121

115122
filter_param = ["--filter-msa", str(filter), "--filter-min-enable", "1000", "--diff", str(diff), "--qid", "0.0,0.2,0.4,0.6,0.8,1.0", "--qsc", "0", "--max-seq-id", "0.95",]
116123
expand_param = ["--expansion-mode", "0", "-e", str(expand_eval), "--expand-filter-clusters", str(filter), "--max-seq-id", "0.95",]
@@ -207,6 +214,8 @@ def mmseqs_search_pair(
207214
prefilter_mode: int = 0,
208215
s: float = 8,
209216
threads: int = 64,
217+
gpu: bool = False,
218+
gpu_server: bool = False,
210219
db_load_mode: int = 2,
211220
pairing_strategy: int = 0,
212221
unpack: bool = True,
@@ -238,11 +247,16 @@ def mmseqs_search_pair(
238247
# fmt: off
239248
# @formatter:off
240249
search_param = ["--num-iterations", "3", "--db-load-mode", str(db_load_mode), "-a", "-e", "0.1", "--max-seqs", "10000",]
241-
search_param += ["--prefilter-mode", str(prefilter_mode)]
242-
if s is not None:
243-
search_param += ["-s", "{:.1f}".format(s)]
250+
if gpu:
251+
search_param += ["--gpu", str(gpu), "--prefilter-mode", "1"] # gpu version only supports ungapped prefilter currently
244252
else:
245-
search_param += ["--k-score", "'seq:96,prof:80'"]
253+
search_param += ["--prefilter-mode", str(prefilter_mode)]
254+
if s is not None: # sensitivy can only be set for non-gpu version, gpu version runs at max sensitivity
255+
search_param += ["-s", "{:.1f}".format(s)]
256+
else:
257+
search_param += ["--k-score", "'seq:96,prof:80'"]
258+
if gpu_server:
259+
search_param += ["--gpu-server", str(gpu_server)]
246260
expand_param = ["--expansion-mode", "0", "-e", "inf", "--expand-filter-clusters", "0", "--max-seq-id", "0.95",]
247261
run_mmseqs(mmseqs, ["search", base.joinpath("qdb"), dbbase.joinpath(db), base.joinpath("res"), base.joinpath("tmp"), "--threads", str(threads),] + search_param,)
248262
run_mmseqs(mmseqs, ["expandaln", base.joinpath("qdb"), dbbase.joinpath(f"{db}{dbSuffix1}"), base.joinpath("res"), dbbase.joinpath(f"{db}{dbSuffix2}"), base.joinpath("res_exp"), "--db-load-mode", str(db_load_mode), "--threads", str(threads),] + expand_param,)
@@ -373,6 +387,12 @@ def main():
373387
parser.add_argument(
374388
"--threads", type=int, default=64, help="Number of threads to use."
375389
)
390+
parser.add_argument(
391+
"--gpu", type=int, default=0, choices=[0, 1], help="Whether to use GPU (1) or not (0). Control number of GPUs with CUDA_VISIBLE_DEVICES env var."
392+
)
393+
parser.add_argument(
394+
"--gpu-server", type=int, default=0, choices=[0, 1], help="Whether to use GPU server (1) or not (0)"
395+
)
376396
args = parser.parse_args()
377397

378398
logging.basicConfig(level = logging.INFO)
@@ -446,6 +466,8 @@ def main():
446466
s=args.s,
447467
db_load_mode=args.db_load_mode,
448468
threads=args.threads,
469+
gpu=args.gpu,
470+
gpu_server=args.gpu_server,
449471
unpack=args.unpack,
450472
)
451473
if is_complex is True:
@@ -458,6 +480,8 @@ def main():
458480
s=args.s,
459481
db_load_mode=args.db_load_mode,
460482
threads=args.threads,
483+
gpu=args.gpu,
484+
gpu_server=args.gpu_server,
461485
pairing_strategy=args.pairing_strategy,
462486
pair_env=False,
463487
unpack=args.unpack,
@@ -473,6 +497,8 @@ def main():
473497
s=args.s,
474498
db_load_mode=args.db_load_mode,
475499
threads=args.threads,
500+
gpu=args.gpu,
501+
gpu_server=args.gpu_server,
476502
pairing_strategy=args.pairing_strategy,
477503
pair_env=True,
478504
unpack=args.unpack,

0 commit comments

Comments
 (0)