Skip to content
This repository was archived by the owner on Mar 19, 2021. It is now read-only.

Commit 5083778

Browse files
danieldkDaniël de Kok
authored andcommitted
Remove cycles from the dependency graph
Applies the strategy suggested by Strzyz et al., 2019 to attach the leftmost token in a cycle to the first root token.
1 parent af4ae56 commit 5083778

File tree

5 files changed

+95
-7
lines changed

5 files changed

+95
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sticker/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ finalfusion = "0.5"
1111
itertools = "0.8"
1212
ndarray = "0.12"
1313
ordered-float = "1"
14+
petgraph = "0.4"
1415
protobuf = "1"
1516
serde = "1"
1617
serde_derive = "1"

sticker/src/depparse/post_processing.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use conllx::graph::{DepTriple, Sentence};
22
use ordered_float::OrderedFloat;
3+
use petgraph::algo::tarjan_scc;
4+
use petgraph::graph::DiGraph;
35

46
use super::DependencyEncoding;
57
use crate::EncodingProb;
@@ -29,6 +31,50 @@ where
2931
}
3032
}
3133

34+
/// Break cycles in the graph.
35+
///
36+
/// Panics when a token does not have a head. To ensure that each
37+
/// token has a head, apply `attach_orphans` to the dependency graph
38+
/// before this function.
39+
pub fn break_cycles(sent: &mut Sentence, root_idx: usize) {
40+
loop {
41+
let components = {
42+
let digraph: &DiGraph<_, _> = (&*sent).into();
43+
tarjan_scc(digraph)
44+
.into_iter()
45+
.filter(|c| c.len() > 1)
46+
.collect::<Vec<_>>()
47+
};
48+
49+
// We are done if there are no more cycles.
50+
if components.is_empty() {
51+
break;
52+
}
53+
54+
for cycle in components.into_iter() {
55+
// Find the first token in the cycle, exclude the root
56+
// token to avoid self-cycles.
57+
let first_token = cycle
58+
.into_iter()
59+
.filter(|idx| idx.index() != root_idx)
60+
.min()
61+
.expect("Cannot get minimum, but iterator is non-empty")
62+
.index();
63+
64+
// Reattach the token to the root.
65+
let head_rel = sent
66+
.dep_graph()
67+
.head(first_token)
68+
.expect("Token without a head")
69+
.relation()
70+
.map(ToOwned::to_owned);
71+
72+
sent.dep_graph_mut()
73+
.add_deprel(DepTriple::new(root_idx, head_rel, first_token));
74+
}
75+
}
76+
}
77+
3278
/// Find a candidate root token.
3379
///
3480
/// The token which with the highest-probability encoding with the
@@ -113,7 +159,7 @@ mod tests {
113159
use conllx::graph::{DepTriple, Sentence};
114160
use conllx::token::TokenBuilder;
115161

116-
use super::{attach_orphans, find_or_create_root, first_root, ROOT_RELATION};
162+
use super::{attach_orphans, break_cycles, find_or_create_root, first_root, ROOT_RELATION};
117163
use crate::depparse::{
118164
pos_position_table, DependencyEncoding, RelativePOS, RelativePOSEncoder,
119165
};
@@ -137,6 +183,24 @@ mod tests {
137183
sent
138184
}
139185

186+
fn test_graph_cycle() -> Sentence {
187+
let mut sent = Sentence::new();
188+
sent.push(TokenBuilder::new("Die").pos("det").into());
189+
sent.push(TokenBuilder::new("AWO").pos("noun").into());
190+
sent.push(TokenBuilder::new("veruntreute").pos("verb").into());
191+
sent.push(TokenBuilder::new("Spendengeld").pos("noun").into());
192+
sent.dep_graph_mut()
193+
.add_deprel(DepTriple::new(2, Some("det"), 1));
194+
sent.dep_graph_mut()
195+
.add_deprel(DepTriple::new(1, Some("subj"), 2));
196+
sent.dep_graph_mut()
197+
.add_deprel(DepTriple::new(0, Some(ROOT_RELATION), 3));
198+
sent.dep_graph_mut()
199+
.add_deprel(DepTriple::new(3, Some("obj"), 4));
200+
201+
sent
202+
}
203+
140204
fn test_graph_no_root() -> Sentence {
141205
let mut sent = Sentence::new();
142206
sent.push(TokenBuilder::new("Die").pos("det").into());
@@ -230,4 +294,20 @@ mod tests {
230294

231295
assert_eq!(sent, test_graph());
232296
}
297+
298+
#[test]
299+
fn break_simple_cycle() {
300+
let mut check = test_graph_cycle();
301+
// Token 1 is the leftmost token in the cycle and
302+
// should be reattached to the head.
303+
check
304+
.dep_graph_mut()
305+
.add_deprel(DepTriple::new(3, Some("det"), 1));
306+
307+
// Detect cycle and break it.
308+
let mut sent = test_graph_cycle();
309+
break_cycles(&mut sent, 3);
310+
311+
assert_eq!(sent, check);
312+
}
233313
}

sticker/src/depparse/relative_pos.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ use conllx::graph::{DepTriple, Node, Sentence};
44
use failure::Error;
55
use serde_derive::{Deserialize, Serialize};
66

7-
use super::{attach_orphans, find_or_create_root, DecodeError, DependencyEncoding, EncodeError};
7+
use super::{
8+
attach_orphans, break_cycles, find_or_create_root, DecodeError, DependencyEncoding, EncodeError,
9+
};
810
use crate::{EncodingProb, SentenceDecoder, SentenceEncoder};
911

1012
/// Relative head position by part-of-speech.
@@ -201,10 +203,11 @@ impl SentenceDecoder for RelativePOSEncoder {
201203
}
202204

203205
// Fixup tree.
204-
let root = find_or_create_root(labels, sentence, |idx, encoding| {
206+
let root_idx = find_or_create_root(labels, sentence, |idx, encoding| {
205207
Self::decode_idx(&pos_table, idx, encoding).ok()
206208
});
207-
attach_orphans(labels, sentence, root);
209+
attach_orphans(labels, sentence, root_idx);
210+
break_cycles(sentence, root_idx);
208211

209212
Ok(())
210213
}

sticker/src/depparse/relative_position.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use conllx::graph::{DepTriple, Sentence};
22
use failure::Error;
33
use serde_derive::{Deserialize, Serialize};
44

5-
use super::{attach_orphans, find_or_create_root, DecodeError, DependencyEncoding, EncodeError};
5+
use super::{
6+
attach_orphans, break_cycles, find_or_create_root, DecodeError, DependencyEncoding, EncodeError,
7+
};
68
use crate::{EncodingProb, SentenceDecoder, SentenceEncoder};
79

810
/// Relative head position.
@@ -101,10 +103,11 @@ impl SentenceDecoder for RelativePositionEncoder {
101103

102104
// Fixup tree.
103105
let sentence_len = sentence.len();
104-
let root = find_or_create_root(labels, sentence, |idx, encoding| {
106+
let root_idx = find_or_create_root(labels, sentence, |idx, encoding| {
105107
Self::decode_idx(idx, sentence_len, encoding).ok()
106108
});
107-
attach_orphans(labels, sentence, root);
109+
attach_orphans(labels, sentence, root_idx);
110+
break_cycles(sentence, root_idx);
108111

109112
Ok(())
110113
}

0 commit comments

Comments
 (0)