diff --git a/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz index d39ae6b..6044c07 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz differ diff --git a/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz.tbi index 25dbc49..94d0585 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/bench/bench_bnd/log.txt b/repo_utils/answer_key/bench/bench_bnd/log.txt index 1057c6e..181089b 100644 --- a/repo_utils/answer_key/bench/bench_bnd/log.txt +++ b/repo_utils/answer_key/bench/bench_bnd/log.txt @@ -1,6 +1,6 @@ -2025-01-29 01:41:34,415 [INFO] Truvari v5.0.1.dev0+24859cf.uc -2025-01-29 01:41:34,416 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/ --no-decompose -2025-01-29 01:41:34,418 [INFO] Params: +2025-02-16 22:44:26,757 [INFO] Truvari v5.2.1.dev0+ec2fc3c.uc +2025-02-16 22:44:26,758 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/ --no-decompose +2025-02-16 22:44:26,759 [INFO] Params: { "base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", @@ -36,19 +36,19 @@ "skip_gt": false, "max_resolve": 25000 } -2025-01-29 01:41:34,484 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF. -2025-01-29 01:41:34,766 [INFO] Zipped 439 variants Counter({'comp': 243, 'base': 196}) -2025-01-29 01:41:34,767 [INFO] 226 chunks of 439 variants Counter({'comp': 243, 'base': 195, '__filtered': 1}) -2025-01-29 01:41:34,907 [INFO] Stats: { +2025-02-16 22:44:26,818 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF. +2025-02-16 22:44:27,079 [INFO] Zipped 439 variants Counter({'comp': 243, 'base': 196}) +2025-02-16 22:44:27,080 [INFO] 226 chunks of 439 variants Counter({'comp': 243, 'base': 195, '__filtered': 1}) +2025-02-16 22:44:27,201 [INFO] Stats: { "TP-base": 120, "TP-comp": 120, - "FP": 102, + "FP": 123, "FN": 75, - "precision": 0.5405405405405406, + "precision": 0.49382716049382713, "recall": 0.6153846153846154, - "f1": 0.5755395683453237, + "f1": 0.547945205479452, "base cnt": 195, - "comp cnt": 222, + "comp cnt": 243, "TP-comp_TP-gt": 100, "TP-comp_FP-gt": 20, "TP-base_TP-gt": 100, @@ -68,4 +68,4 @@ } } } -2025-01-29 01:41:34,908 [INFO] Finished bench +2025-02-16 22:44:27,202 [INFO] Finished bench diff --git a/repo_utils/answer_key/bench/bench_bnd/summary.json b/repo_utils/answer_key/bench/bench_bnd/summary.json index c7d5c2e..0cccce7 100644 --- a/repo_utils/answer_key/bench/bench_bnd/summary.json +++ b/repo_utils/answer_key/bench/bench_bnd/summary.json @@ -1,13 +1,13 @@ { "TP-base": 120, "TP-comp": 120, - "FP": 102, + "FP": 123, "FN": 75, - "precision": 0.5405405405405406, + "precision": 0.49382716049382713, "recall": 0.6153846153846154, - "f1": 0.5755395683453237, + "f1": 0.547945205479452, "base cnt": 195, - "comp cnt": 222, + "comp cnt": 243, "TP-comp_TP-gt": 100, "TP-comp_FP-gt": 20, "TP-base_TP-gt": 100, diff --git a/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz b/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz index 3852c75..6888ef2 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz differ diff --git a/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi index 68da2bc..25bd9cf 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/bench/bench_bnd_decomp/log.txt b/repo_utils/answer_key/bench/bench_bnd_decomp/log.txt index e6edba6..7f76a3c 100644 --- a/repo_utils/answer_key/bench/bench_bnd_decomp/log.txt +++ b/repo_utils/answer_key/bench/bench_bnd_decomp/log.txt @@ -1,6 +1,6 @@ -2025-01-30 22:47:16,962 [INFO] Truvari v5.0.1.dev0+664fd5b -2025-01-30 22:47:16,963 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/ -2025-01-30 22:47:16,964 [INFO] Params: +2025-02-16 22:44:28,882 [INFO] Truvari v5.2.1.dev0+ec2fc3c.uc +2025-02-16 22:44:28,883 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/ +2025-02-16 22:44:28,884 [INFO] Params: { "base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp2.vcf.gz", @@ -36,19 +36,19 @@ "skip_gt": false, "max_resolve": 25000 } -2025-01-30 22:47:17,047 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF. -2025-01-30 22:47:38,251 [INFO] Zipped 30098 variants Counter({'comp': 29902, 'base': 196}) -2025-01-30 22:47:38,252 [INFO] 20660 chunks of 30098 variants Counter({'comp': 29902, 'base': 195, '__filtered': 1}) -2025-01-30 22:47:42,922 [INFO] Stats: { +2025-02-16 22:44:28,971 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF. +2025-02-16 22:44:50,577 [INFO] Zipped 30098 variants Counter({'comp': 29902, 'base': 196}) +2025-02-16 22:44:50,577 [INFO] 20660 chunks of 30098 variants Counter({'comp': 29902, 'base': 195, '__filtered': 1}) +2025-02-16 22:44:55,998 [INFO] Stats: { "TP-base": 152, "TP-comp": 131, - "FP": 28401, + "FP": 29558, "FN": 43, - "precision": 0.004591336043740362, + "precision": 0.004412408636195224, "recall": 0.7794871794871795, - "f1": 0.009128901026932595, + "f1": 0.008775144201329448, "base cnt": 195, - "comp cnt": 28532, + "comp cnt": 29689, "TP-comp_TP-gt": 1, "TP-comp_FP-gt": 130, "TP-base_TP-gt": 1, @@ -79,4 +79,4 @@ } } } -2025-01-30 22:47:42,923 [INFO] Finished bench +2025-02-16 22:44:55,999 [INFO] Finished bench diff --git a/repo_utils/answer_key/bench/bench_bnd_decomp/summary.json b/repo_utils/answer_key/bench/bench_bnd_decomp/summary.json index 913015e..f7860cc 100644 --- a/repo_utils/answer_key/bench/bench_bnd_decomp/summary.json +++ b/repo_utils/answer_key/bench/bench_bnd_decomp/summary.json @@ -1,13 +1,13 @@ { "TP-base": 152, "TP-comp": 131, - "FP": 28401, + "FP": 29558, "FN": 43, - "precision": 0.004591336043740362, + "precision": 0.004412408636195224, "recall": 0.7794871794871795, - "f1": 0.009128901026932595, + "f1": 0.008775144201329448, "base cnt": 195, - "comp cnt": 28532, + "comp cnt": 29689, "TP-comp_TP-gt": 1, "TP-comp_FP-gt": 130, "TP-base_TP-gt": 1, diff --git a/truvari/bench.py b/truvari/bench.py index 1dc36ac..a89b353 100644 --- a/truvari/bench.py +++ b/truvari/bench.py @@ -385,7 +385,7 @@ def write_match(self, match): box["TP-comp_TP-gt"] += 1 else: box["TP-comp_FP-gt"] += 1 - elif match.comp.var_size() >= self.m_params.sizemin: + elif match.comp.var_size() >= self.m_params.sizemin or match.comp.is_bnd(): # The if is because we don't count FPs between sizefilt-sizemin box["comp cnt"] += 1 box["FP"] += 1 diff --git a/truvari/matching.py b/truvari/matching.py index 4f5afe0..d8c3b98 100644 --- a/truvari/matching.py +++ b/truvari/matching.py @@ -166,7 +166,6 @@ def chunker(params, *files): cur_end = 0 cur_chunk = defaultdict(list) reference = pysam.FastaFile(params.reference) if params.reference is not None else None - for key, entry in file_zipper(*files): if entry.filter_call(key == 'base'): cur_chunk['__filtered'].append(entry)