Skip to content

Commit 8904af0

Browse files
author
mdiberna
committed
cleaned up mutation colors and plotting
1 parent a8f824d commit 8904af0

File tree

6 files changed

+202
-317
lines changed

6 files changed

+202
-317
lines changed

.gitignore

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ data/genome_data/*.txt
1111
data/ribosome_profiling
1212
data/mutation_data
1313

14-
# output folders
15-
notebooks/**/
16-
notebooks/*_test.ipynb
14+
# test notebooks
15+
notebooks/
1716

1817
# script outputs
1918
scripts/out

notebooks/bed_cleanup.ipynb

Lines changed: 0 additions & 238 deletions
This file was deleted.

scripts/analyze_truncations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ async def process_gene(
390390
alt_features=truncation_feature,
391391
mutations_df=pair_mutations,
392392
output_file=str(
393-
transcript_dir / f"{pair_base_filename}_filtered.png"
393+
transcript_dir / f"{pair_base_filename}_filtered.pdf"
394394
),
395395
)
396396

@@ -401,7 +401,7 @@ async def process_gene(
401401
alt_features=truncation_feature,
402402
mutations_df=pair_mutations,
403403
output_file=str(
404-
transcript_dir / f"{pair_base_filename}_filtered_zoom.png"
404+
transcript_dir / f"{pair_base_filename}_filtered_zoom.pdf"
405405
),
406406
padding=100,
407407
)

scripts/analyze_truncations_full.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
#SBATCH --ntasks=1 # Run a single task
66
#SBATCH --cpus-per-task=12 # Single CPU for the controller job
77
#SBATCH --mem=24G # Memory for the controller job
8-
#SBATCH --time=1:00:00 # Time limit (hrs:min:sec)
8+
#SBATCH --time=12:00:00 # Time limit (hrs:min:sec)
99
#SBATCH --output=out/truncations-%j.out # Standard output log
1010

1111
# Activate conda environment (adjust path as needed)
1212
source ~/.bashrc
1313
conda activate swissisoform
1414

1515
# Create output directory
16-
mkdir -p results_reduced
16+
mkdir -p results
1717

1818
# Define paths for command-line arguments
1919
GENE_LIST="../data/ribosome_profiling/gene_list.txt"

scripts/cleanup_files.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env python3
2+
3+
"""Cleanup files for the ribosome profiling data.
4+
5+
This script cleans up the GTF file and BED files for the ribosome profiling data.
6+
It updates the gene names in the GTF file to match the latest version of the GTF file.
7+
It also cleans up the BED files to update gene names and remove duplicates.
8+
"""
9+
10+
from swissisoform.alternative_isoforms import AlternativeIsoform
11+
from swissisoform.utils import cleanup_bed, update_gencode_gene_names
12+
13+
# GTF: update gene names
14+
input_gtf = "../data/genome_data/gencode.v25.annotation.gtf"
15+
output_gtf = "../data/genome_data/gencode.v25.annotation.ensembl_cleaned.gtf"
16+
reference_gtf = "../data/genome_data/gencode.v47.annotation.gtf"
17+
18+
update_gencode_gene_names(
19+
input_gtf_path=input_gtf,
20+
output_gtf_path=output_gtf,
21+
reference_gtf_path=reference_gtf,
22+
verbose=True,
23+
)
24+
25+
# All truncations: bed cleanup
26+
input_bed = "../data/ribosome_profiling/full_truncations_JL.bed"
27+
output_bed = "../data/ribosome_profiling/full_truncations_JL_cleaned.bed"
28+
29+
cleanup_bed(input_bed, output_bed, gtf_path=output_gtf, verbose=True)
30+
31+
alt_isoforms = AlternativeIsoform()
32+
alt_isoforms.load_bed("../data/ribosome_profiling/full_truncations_JL_cleaned.bed")
33+
gene_list = alt_isoforms.get_gene_list()
34+
35+
with open("../data/ribosome_profiling/gene_list.txt", "w") as f:
36+
for gene in gene_list:
37+
f.write(gene + "\n")
38+
39+
# Selected truncations: bed cleanup
40+
input_bed = "../data/ribosome_profiling/selected_truncations_JL.bed"
41+
output_bed = "../data/ribosome_profiling/selected_truncations_JL_cleaned.bed"
42+
43+
cleanup_bed(input_bed, output_bed, gtf_path=output_gtf, verbose=True)
44+
45+
alt_isoforms = AlternativeIsoform()
46+
alt_isoforms.load_bed("../data/ribosome_profiling/selected_truncations_JL_cleaned.bed")
47+
gene_list = alt_isoforms.get_gene_list()
48+
49+
with open("../data/ribosome_profiling/gene_list_reduced.txt", "w") as f:
50+
for gene in gene_list:
51+
f.write(gene + "\n")

0 commit comments

Comments
 (0)