Skip to content

Commit

Permalink
fix --run_ancestry variant verify
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Oct 10, 2024
1 parent 676030c commit 66373b1
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
16 changes: 11 additions & 5 deletions modules/local/ancestry/relabel_scorefiles.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process RELABEL_SCOREFILES {
label 'process_medium'
label 'pgscatalog_utils' // controls conda, docker, + singularity options

tag "$meta.id $meta.effect_type $target_format"
tag "reference $meta.effect_type $target_format"

conda "${task.ext.conda}"

Expand All @@ -16,13 +16,12 @@ process RELABEL_SCOREFILES {
tuple val(meta), path(target), path(matched)

output:
tuple val(relabel_meta), path("${output}"), emit: relabelled
tuple val(relabel_meta), path("reference*"), emit: relabelled
path "versions.yml", emit: versions

script:
target_format = target.getName().tokenize('.')[1] // test.tar.gz -> tar, test.var -> var
relabel_meta = meta.plus(['target_format': target_format]) // .plus() returns a new map
output_mode = "--split --combined" // always output split and combined data to make life easier
col_from = "ID_TARGET"
col_to = "ID_REF"
output = "${meta.id}.${target_format}*"
Expand All @@ -32,11 +31,18 @@ process RELABEL_SCOREFILES {
--col_to $col_to \
--target_file $target \
--target_col ID \
--dataset ${meta.id}.${target_format} \
--dataset reference \
--verbose \
$output_mode \
--combined \
--outdir \$PWD
# TODO: improve pgscatalog-relabel so you can set output names precisely
# use some unpleasant sed to keep a consistent naming scheme
# hgdp_ALL_additive_0.scorefile.gz -> reference_ALL_additive_0.scorefile.gz
output=\$(echo $target | sed 's/^[^_]*_/reference_/')
mv reference_ALL_relabelled.gz \$output
cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
pgscatalog.core: \$(echo \$(python -c 'import pgscatalog.core; print(pgscatalog.core.__version__)'))
Expand Down
18 changes: 15 additions & 3 deletions subworkflows/local/apply_score.nf
Original file line number Diff line number Diff line change
Expand Up @@ -125,18 +125,30 @@ workflow APPLY_SCORE {

// pgscatalog-aggregate --verify_variants notes:
// Checks that variant IDs in the scorefiles match the IDs of scored variants perfectly
// Just dump all of the supporting files into the same directory: don't do any fancy joins
// Just dump all of the supporting files into the same directory: don't do any fancy channel manipulation
PLINK2_SCORE.out.vars_scored
.collect()
.set { ch_vars_scored }

ch_target_scorefile.flatMap { it.last() }
.filter(Path)
.collect()
.set{ ch_scorefile_verify }
.set{ ch_target_scorefile_flat }

SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_scorefile_verify )
// note, for the calculated score:
// reference_ALL_additive_0.sscore.zst (ch_scores)
// --verify_variants expects the following files in the same directory
// reference_ALL_additive_0.sscore.vars (ch_vars_scored)
// reference_ALL_additive_0.scorefile.gz (ch_verify_scorefiles)

ch_apply_ref.flatMap { it.last() }
.filter(Path)
.mix( ch_target_scorefile_flat )
.collect()
.set{ ch_verify_scorefiles }

SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_verify_scorefiles )

ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions)

emit:
Expand Down

0 comments on commit 66373b1

Please sign in to comment.