Skip to content

Commit a0c466a

Browse files
committed
Set up and use phase layers to represent unphased alleles and pooled design parts
1 parent 927a789 commit a0c466a

File tree

5 files changed

+242
-6
lines changed

5 files changed

+242
-6
lines changed

migrations/core/01-initial/up.sql

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,25 @@ CREATE TABLE path_edges (
129129
) STRICT;
130130
CREATE UNIQUE INDEX path_edges_uidx ON path_edges(path_id, edge_id, index_in_path);
131131

132+
CREATE TABLE phase_layers (
133+
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
134+
chromosome_index INTEGER NOT NULL,
135+
is_reference INTEGER NOT NULL DEFAULT 0
136+
) STRICT;
137+
CREATE UNIQUE INDEX phase_layer_uidx ON phase_layers(chromosome_index, is_reference) WHERE is_reference = 1;
138+
132139
CREATE TABLE block_group_edges (
133140
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
134141
block_group_id INTEGER NOT NULL,
135142
edge_id INTEGER NOT NULL,
136143
chromosome_index INTEGER,
137144
phased INTEGER NOT NULL,
145+
source_phase_layer_id INTEGER,
146+
target_phase_layer_id INTEGER,
138147
FOREIGN KEY(block_group_id) REFERENCES block_groups(id),
139-
FOREIGN KEY(edge_id) REFERENCES edges(id)
148+
FOREIGN KEY(edge_id) REFERENCES edges(id),
149+
FOREIGN KEY(source_phase_layer_id) REFERENCES phase_layers(id),
150+
FOREIGN KEY(target_phase_layer_id) REFERENCES phase_layers(id)
140151
) STRICT;
141152
CREATE UNIQUE INDEX block_group_edges_uidx ON block_group_edges(block_group_id, edge_id, chromosome_index, phased);
142153

src/models.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ pub mod collection;
55
pub mod edge;
66
pub mod file_types;
77
pub mod metadata;
8+
pub mod new_block_group_edge;
89
pub mod node;
910
pub mod operations;
1011
pub mod path;
1112
pub mod path_edge;
13+
pub mod phase_layer;
1214
pub mod sample;
1315
pub mod sequence;
1416
pub mod strand;

src/models/new_block_group_edge.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
use crate::models::edge::{Edge, EdgeData};
2+
use crate::models::traits::*;
3+
use rusqlite;
4+
use rusqlite::types::Value;
5+
use rusqlite::{Connection, Row};
6+
use std::collections::HashMap;
7+
8+
#[derive(Clone, Debug)]
9+
pub struct NewBlockGroupEdge {
10+
pub id: i64,
11+
pub block_group_id: i64,
12+
pub edge_id: i64,
13+
pub chromosome_index: i64,
14+
pub phased: i64,
15+
pub source_phase_layer_id: i64,
16+
pub target_phase_layer_id: i64,
17+
}
18+
19+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
20+
pub struct NewBlockGroupEdgeData {
21+
pub block_group_id: i64,
22+
pub edge_id: i64,
23+
pub chromosome_index: i64,
24+
pub phased: i64,
25+
pub source_phase_layer_id: i64,
26+
pub target_phase_layer_id: i64,
27+
}
28+
29+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
30+
pub struct NewAugmentedEdge {
31+
pub edge: Edge,
32+
pub chromosome_index: i64,
33+
pub phased: i64,
34+
pub source_phase_layer_id: i64,
35+
pub target_phase_layer_id: i64,
36+
}
37+
38+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
39+
pub struct NewAugmentedEdgeData {
40+
pub edge_data: EdgeData,
41+
pub chromosome_index: i64,
42+
pub phased: i64,
43+
pub source_phase_layer_id: i64,
44+
pub target_phase_layer_id: i64,
45+
}
46+
47+
impl Query for NewBlockGroupEdge {
48+
type Model = NewBlockGroupEdge;
49+
fn process_row(row: &Row) -> Self::Model {
50+
NewBlockGroupEdge {
51+
id: row.get(0).unwrap(),
52+
block_group_id: row.get(1).unwrap(),
53+
edge_id: row.get(2).unwrap(),
54+
chromosome_index: row.get(3).unwrap(),
55+
phased: row.get(4).unwrap(),
56+
source_phase_layer_id: row.get(5).unwrap(),
57+
target_phase_layer_id: row.get(6).unwrap(),
58+
}
59+
}
60+
}
61+
62+
impl NewBlockGroupEdge {
63+
pub fn bulk_create(conn: &Connection, block_group_edges: &[NewBlockGroupEdgeData]) {
64+
for chunk in block_group_edges.chunks(100000) {
65+
let mut rows_to_insert = vec![];
66+
for block_group_edge in chunk {
67+
let row = format!(
68+
"({0}, {1}, {2}, {3}, {4}, {5})",
69+
block_group_edge.block_group_id,
70+
block_group_edge.edge_id,
71+
block_group_edge.chromosome_index,
72+
block_group_edge.phased,
73+
block_group_edge.source_phase_layer_id,
74+
block_group_edge.target_phase_layer_id,
75+
);
76+
rows_to_insert.push(row);
77+
}
78+
79+
let formatted_rows_to_insert = rows_to_insert.join(", ");
80+
81+
let insert_statement = format!(
82+
"INSERT OR IGNORE INTO block_group_edges (block_group_id, edge_id, chromosome_index, phased, source_phase_layer_id, target_phase_layer_id) VALUES {0};",
83+
formatted_rows_to_insert
84+
);
85+
let _ = conn.execute(&insert_statement, ());
86+
}
87+
}
88+
89+
pub fn edges_for_block_group(conn: &Connection, block_group_id: i64) -> Vec<NewAugmentedEdge> {
90+
let block_group_edges = NewBlockGroupEdge::query(
91+
conn,
92+
"select * from block_group_edges where block_group_id = ?1;",
93+
rusqlite::params!(Value::from(block_group_id)),
94+
);
95+
let edge_ids = block_group_edges
96+
.clone()
97+
.into_iter()
98+
.map(|block_group_edge| block_group_edge.edge_id)
99+
.collect::<Vec<i64>>();
100+
let chromosome_index_by_edge_id = block_group_edges
101+
.clone()
102+
.into_iter()
103+
.map(|block_group_edge| (block_group_edge.edge_id, block_group_edge.chromosome_index))
104+
.collect::<HashMap<i64, i64>>();
105+
let phased_by_edge_id = block_group_edges
106+
.clone()
107+
.into_iter()
108+
.map(|block_group_edge| (block_group_edge.edge_id, block_group_edge.phased))
109+
.collect::<HashMap<i64, i64>>();
110+
let source_phase_layer_id_by_edge_id = block_group_edges
111+
.clone()
112+
.into_iter()
113+
.map(|block_group_edge| {
114+
(
115+
block_group_edge.edge_id,
116+
block_group_edge.source_phase_layer_id,
117+
)
118+
})
119+
.collect::<HashMap<i64, i64>>();
120+
let target_phase_layer_id_by_edge_id = block_group_edges
121+
.clone()
122+
.into_iter()
123+
.map(|block_group_edge| {
124+
(
125+
block_group_edge.edge_id,
126+
block_group_edge.target_phase_layer_id,
127+
)
128+
})
129+
.collect::<HashMap<i64, i64>>();
130+
let edges = Edge::bulk_load(conn, &edge_ids);
131+
edges
132+
.into_iter()
133+
.map(|edge| NewAugmentedEdge {
134+
edge: edge.clone(),
135+
chromosome_index: *chromosome_index_by_edge_id.get(&edge.id).unwrap(),
136+
phased: *phased_by_edge_id.get(&edge.id).unwrap(),
137+
source_phase_layer_id: *source_phase_layer_id_by_edge_id.get(&edge.id).unwrap(),
138+
target_phase_layer_id: *target_phase_layer_id_by_edge_id.get(&edge.id).unwrap(),
139+
})
140+
.collect()
141+
}
142+
}

src/models/phase_layer.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use rusqlite::Connection;
2+
3+
#[derive(Clone, Debug)]
4+
pub struct PhaseLayer {
5+
pub id: i64,
6+
pub chromosome_index: i64,
7+
pub is_reference: i64,
8+
}
9+
10+
pub const UNPHASED_CHROMOSOME_INDEX: i64 = -1;
11+
12+
impl PhaseLayer {
13+
pub fn get_or_create(
14+
conn: &Connection,
15+
chromosome_index: i64,
16+
is_reference: i64,
17+
) -> Result<i64, &'static str> {
18+
let phase_layer_id: i64 = match conn.query_row(
19+
"select id from phase_layers where chromosome_index = ?1 AND is_reference = ?2",
20+
(chromosome_index, is_reference),
21+
|row| row.get(0),
22+
) {
23+
Ok(res) => res,
24+
Err(rusqlite::Error::QueryReturnedNoRows) => 0,
25+
Err(_e) => {
26+
panic!("Error querying the database: {_e}");
27+
}
28+
};
29+
if phase_layer_id != 0 {
30+
return Ok(phase_layer_id);
31+
}
32+
33+
let new_phase_layer_id = PhaseLayer::create(conn, chromosome_index, is_reference);
34+
35+
Ok(new_phase_layer_id)
36+
}
37+
38+
pub fn create(conn: &Connection, chromosome_index: i64, is_reference: i64) -> i64 {
39+
let query = "INSERT INTO phase_layers (chromosome_index, is_reference) VALUES (?1, ?2) RETURNING (id)";
40+
let mut stmt = conn.prepare(query).unwrap();
41+
match stmt.query_row((chromosome_index, is_reference), |row| row.get(0)) {
42+
Ok(res) => res,
43+
Err(rusqlite::Error::SqliteFailure(err, details)) => {
44+
if err.code == rusqlite::ErrorCode::ConstraintViolation {
45+
println!("{err:?} {details:?}");
46+
conn
47+
.query_row(
48+
"select id from phase_layers where chromosome_index = ?1 and is_reference = ?2",
49+
(chromosome_index, is_reference),
50+
|row| row.get(0),
51+
)
52+
.unwrap()
53+
} else {
54+
panic!("something bad happened querying the database")
55+
}
56+
}
57+
Err(_) => {
58+
panic!("something bad happened querying the database")
59+
}
60+
}
61+
}
62+
}

src/updates/library.rs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ use std::io::BufReader;
88
use std::str;
99

1010
use crate::models::block_group::BlockGroup;
11-
use crate::models::block_group_edge::{BlockGroupEdge, BlockGroupEdgeData};
1211
use crate::models::edge::{Edge, EdgeData};
1312
use crate::models::file_types::FileTypes;
13+
use crate::models::new_block_group_edge::{NewBlockGroupEdge, NewBlockGroupEdgeData};
1414
use crate::models::node::Node;
1515
use crate::models::operations::OperationInfo;
16+
use crate::models::phase_layer::{PhaseLayer, UNPHASED_CHROMOSOME_INDEX};
1617
use crate::models::sample::Sample;
1718
use crate::models::sequence::Sequence;
1819
use crate::models::strand::Strand;
@@ -130,6 +131,21 @@ pub fn update_with_library(
130131
let node_end_coordinate = end_coordinate - end_block.start + end_block.sequence_start;
131132

132133
let mut new_edges = HashSet::new();
134+
let mut phase_layers_by_node_id = HashMap::new();
135+
let unphased_layer_id1 = PhaseLayer::create(conn, UNPHASED_CHROMOSOME_INDEX, 0);
136+
let unphased_layer_id2 = PhaseLayer::create(conn, UNPHASED_CHROMOSOME_INDEX, 0);
137+
// TODO: Change path to use block group edges instead of edges,
138+
// then use the phase layer of the start and end nodes here instead of the unphased one
139+
phase_layers_by_node_id.insert(start_block.node_id, unphased_layer_id1);
140+
phase_layers_by_node_id.insert(end_block.node_id, unphased_layer_id2);
141+
142+
for parts in parts_list.iter() {
143+
for part in parts.iter() {
144+
let phased_layer_id = PhaseLayer::create(conn, UNPHASED_CHROMOSOME_INDEX, 0);
145+
phase_layers_by_node_id.insert(**part, phased_layer_id);
146+
}
147+
}
148+
133149
let start_parts = parts_list.first().unwrap();
134150
for start_part in *start_parts {
135151
let edge = EdgeData {
@@ -179,16 +195,19 @@ pub fn update_with_library(
179195
path_changes_count *= end_parts.len();
180196

181197
let new_edge_ids = Edge::bulk_create(conn, &new_edges.iter().cloned().collect());
182-
let new_block_group_edges = new_edge_ids
198+
let new_edges = Edge::bulk_load(conn, &new_edge_ids);
199+
let new_block_group_edges = new_edges
183200
.iter()
184-
.map(|edge_id| BlockGroupEdgeData {
201+
.map(|edge| NewBlockGroupEdgeData {
185202
block_group_id: path.block_group_id,
186-
edge_id: *edge_id,
203+
edge_id: edge.id,
187204
chromosome_index: 0,
188205
phased: 0,
206+
source_phase_layer_id: *phase_layers_by_node_id.get(&edge.source_node_id).unwrap(),
207+
target_phase_layer_id: *phase_layers_by_node_id.get(&edge.target_node_id).unwrap(),
189208
})
190209
.collect::<Vec<_>>();
191-
BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
210+
NewBlockGroupEdge::bulk_create(conn, &new_block_group_edges);
192211

193212
let summary_str = format!("{region_name}: {path_changes_count} changes.\n");
194213
operation_management::end_operation(

0 commit comments

Comments
 (0)