diff --git a/modules/local/ancestry/relabel_scorefiles.nf b/modules/local/ancestry/relabel_scorefiles.nf index dccbb632..e186c4bb 100644 --- a/modules/local/ancestry/relabel_scorefiles.nf +++ b/modules/local/ancestry/relabel_scorefiles.nf @@ -3,7 +3,7 @@ process RELABEL_SCOREFILES { label 'process_medium' label 'pgscatalog_utils' // controls conda, docker, + singularity options - tag "$meta.id $meta.effect_type $target_format" + tag "reference $meta.effect_type $target_format" conda "${task.ext.conda}" @@ -16,13 +16,12 @@ process RELABEL_SCOREFILES { tuple val(meta), path(target), path(matched) output: - tuple val(relabel_meta), path("${output}"), emit: relabelled + tuple val(relabel_meta), path("reference*"), emit: relabelled path "versions.yml", emit: versions script: target_format = target.getName().tokenize('.')[1] // test.tar.gz -> tar, test.var -> var relabel_meta = meta.plus(['target_format': target_format]) // .plus() returns a new map - output_mode = "--split --combined" // always output split and combined data to make life easier col_from = "ID_TARGET" col_to = "ID_REF" output = "${meta.id}.${target_format}*" @@ -32,11 +31,18 @@ process RELABEL_SCOREFILES { --col_to $col_to \ --target_file $target \ --target_col ID \ - --dataset ${meta.id}.${target_format} \ + --dataset reference \ --verbose \ - $output_mode \ + --combined \ --outdir \$PWD + # TODO: improve pgscatalog-relabel so you can set output names precisely + # use some unpleasant sed to keep a consistent naming scheme + # hgdp_ALL_additive_0.scorefile.gz -> reference_ALL_additive_0.scorefile.gz + output=\$(echo $target | sed 's/^[^_]*_/reference_/') + + mv reference_ALL_relabelled.gz \$output + cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: pgscatalog.core: \$(echo \$(python -c 'import pgscatalog.core; print(pgscatalog.core.__version__)')) diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf index defb20dd..f91a6c58 100644 --- a/subworkflows/local/apply_score.nf +++ b/subworkflows/local/apply_score.nf @@ -125,7 +125,7 @@ workflow APPLY_SCORE { // pgscatalog-aggregate --verify_variants notes: // Checks that variant IDs in the scorefiles match the IDs of scored variants perfectly - // Just dump all of the supporting files into the same directory: don't do any fancy joins + // Just dump all of the supporting files into the same directory: don't do any fancy channel manipulation PLINK2_SCORE.out.vars_scored .collect() .set { ch_vars_scored } @@ -133,10 +133,22 @@ workflow APPLY_SCORE { ch_target_scorefile.flatMap { it.last() } .filter(Path) .collect() - .set{ ch_scorefile_verify } + .set{ ch_target_scorefile_flat } - SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_scorefile_verify ) + // note, for the calculated score: + // reference_ALL_additive_0.sscore.zst (ch_scores) + // --verify_variants expects the following files in the same directory + // reference_ALL_additive_0.sscore.vars (ch_vars_scored) + // reference_ALL_additive_0.scorefile.gz (ch_verify_scorefiles) + + ch_apply_ref.flatMap { it.last() } + .filter(Path) + .mix( ch_target_scorefile_flat ) + .collect() + .set{ ch_verify_scorefiles } + SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_verify_scorefiles ) + ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions) emit: