Skip to content

Commit 144bb12

Browse files
author
mdiberna
committed
further cleanups, addition of analysis of reduced variants
1 parent 136deef commit 144bb12

File tree

4 files changed

+103
-2
lines changed

4 files changed

+103
-2
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ notebooks/**/
1515

1616
# script outputs
1717
scripts/out
18-
scripts/results
18+
scripts/results
19+
scripts/results_reduced

notebooks/bed_cleanup.ipynb

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 5,
5+
"execution_count": 3,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -12,6 +12,13 @@
1212
")"
1313
]
1414
},
15+
{
16+
"cell_type": "markdown",
17+
"metadata": {},
18+
"source": [
19+
"### Full bed file"
20+
]
21+
},
1522
{
1623
"cell_type": "code",
1724
"execution_count": 6,
@@ -78,6 +85,80 @@
7885
" for gene in gene_list:\n",
7986
" f.write(gene + \"\\n\")"
8087
]
88+
},
89+
{
90+
"cell_type": "markdown",
91+
"metadata": {},
92+
"source": [
93+
"### Reduced bed file"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": 4,
99+
"metadata": {},
100+
"outputs": [],
101+
"source": [
102+
"input_bed = \"../data/ribosome_profiling/selected_truncations_JL.bed\"\n",
103+
"output_bed = \"../data/ribosome_profiling/selected_truncations_JL_cleaned.bed\""
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": 5,
109+
"metadata": {},
110+
"outputs": [
111+
{
112+
"name": "stdout",
113+
"output_type": "stream",
114+
"text": [
115+
"Fetching Ensembl reference data...\n",
116+
"Retrieved 48379 mappings\n",
117+
"\n",
118+
"Cleanup Summary:\n",
119+
" Total entries: 28\n",
120+
" Invalid entries removed: 0\n",
121+
" Duplicates removed: 0\n",
122+
" Gene names updated: 2\n",
123+
" Valid entries in final file: 28\n"
124+
]
125+
},
126+
{
127+
"data": {
128+
"text/plain": [
129+
"{'total': 28,\n",
130+
" 'invalid_format': 0,\n",
131+
" 'invalid_ensembl': 0,\n",
132+
" 'duplicates': 0,\n",
133+
" 'updated': 2,\n",
134+
" 'valid': 28}"
135+
]
136+
},
137+
"execution_count": 5,
138+
"metadata": {},
139+
"output_type": "execute_result"
140+
}
141+
],
142+
"source": [
143+
"cleanup_bed(input_bed, output_bed, verbose=True)"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": 6,
149+
"metadata": {},
150+
"outputs": [],
151+
"source": [
152+
"alt_isoforms = AlternativeIsoform()\n",
153+
"alt_isoforms.load_bed(\n",
154+
" \"../data/ribosome_profiling/selected_truncations_JL_cleaned.bed\"\n",
155+
")\n",
156+
"gene_list = alt_isoforms.get_gene_list()\n",
157+
"\n",
158+
"with open(\"../data/ribosome_profiling/gene_list_reduced.txt\", \"w\") as f:\n",
159+
" for gene in gene_list:\n",
160+
" f.write(gene + \"\\n\")"
161+
]
81162
}
82163
],
83164
"metadata": {
File renamed without changes.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
#SBATCH --job-name=truncations # Job name
4+
#SBATCH --partition=20 # Partition name
5+
#SBATCH --ntasks=1 # Run a single task
6+
#SBATCH --cpus-per-task=12 # Single CPU for the controller job
7+
#SBATCH --mem=12G # Memory for the controller job
8+
#SBATCH --time=24:00:00 # Time limit (hrs:min:sec)
9+
#SBATCH --output=out/truncations-%j.out # Standard output log
10+
11+
# Activate conda environment (adjust path as needed)
12+
source ~/.bashrc
13+
conda activate swissisoform
14+
15+
# Run the truncations script
16+
mkdir -p results_reduced
17+
18+
# Run the analysis script
19+
python3 analyze_truncations.py '../data/ribosome_profiling/gene_list_reduced.txt' 'results_reduced'

0 commit comments

Comments
 (0)