Skip to content

Commit 8cf26da

Browse files
authored
Add possibility to set up highlighten prefix and postfix for snippet (quickwit-oss#1422)
* add possibility to change highlight prefix and postfix * add comment to Snippet::new * add test for highlighten elements * add default highlight prefix and postfix constants * fix spelling * fix tests * fix spelling * do fixes after code review * reduce test_snippet_generator_custom_highlighted_elements code * fix fmt * change names to more convenient --------- Co-authored-by: Sergei Lavrentev <[email protected]>
1 parent a3f0013 commit 8cf26da

File tree

1 file changed

+48
-16
lines changed

1 file changed

+48
-16
lines changed

src/snippet/mod.rs

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ use crate::{Document, Score, Searcher, Term};
1111

1212
const DEFAULT_MAX_NUM_CHARS: usize = 150;
1313

14+
const DEFAULT_SNIPPET_PREFIX: &str = "<b>";
15+
const DEFAULT_SNIPPET_POSTFIX: &str = "</b>";
16+
1417
#[derive(Debug)]
1518
pub struct FragmentCandidate {
1619
score: Score,
@@ -55,17 +58,28 @@ impl FragmentCandidate {
5558
pub struct Snippet {
5659
fragment: String,
5760
highlighted: Vec<Range<usize>>,
61+
snippet_prefix: String,
62+
snippet_postfix: String,
5863
}
5964

60-
const HIGHLIGHTEN_PREFIX: &str = "<b>";
61-
const HIGHLIGHTEN_POSTFIX: &str = "</b>";
62-
6365
impl Snippet {
64-
/// Create a new, empty, `Snippet`
66+
/// Create a new `Snippet`.
67+
fn new(fragment: &str, highlighted: Vec<Range<usize>>) -> Self {
68+
Self {
69+
fragment: fragment.to_string(),
70+
highlighted,
71+
snippet_prefix: DEFAULT_SNIPPET_PREFIX.to_string(),
72+
snippet_postfix: DEFAULT_SNIPPET_POSTFIX.to_string(),
73+
}
74+
}
75+
76+
/// Create a new, empty, `Snippet`.
6577
pub fn empty() -> Snippet {
6678
Snippet {
6779
fragment: String::new(),
6880
highlighted: Vec::new(),
81+
snippet_prefix: String::new(),
82+
snippet_postfix: String::new(),
6983
}
7084
}
7185

@@ -81,9 +95,9 @@ impl Snippet {
8195

8296
for item in collapse_overlapped_ranges(&self.highlighted) {
8397
html.push_str(&encode_minimal(&self.fragment[start_from..item.start]));
84-
html.push_str(HIGHLIGHTEN_PREFIX);
98+
html.push_str(&self.snippet_prefix);
8599
html.push_str(&encode_minimal(&self.fragment[item.clone()]));
86-
html.push_str(HIGHLIGHTEN_POSTFIX);
100+
html.push_str(&self.snippet_postfix);
87101
start_from = item.end;
88102
}
89103
html.push_str(&encode_minimal(
@@ -101,6 +115,12 @@ impl Snippet {
101115
pub fn highlighted(&self) -> &[Range<usize>] {
102116
&self.highlighted
103117
}
118+
119+
/// Sets highlighted prefix and postfix.
120+
pub fn set_snippet_prefix_postfix(&mut self, prefix: &str, postfix: &str) {
121+
self.snippet_prefix = prefix.to_string();
122+
self.snippet_postfix = postfix.to_string()
123+
}
104124
}
105125

106126
/// Returns a non-empty list of "good" fragments.
@@ -172,17 +192,11 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
172192
.iter()
173193
.map(|item| item.start - fragment.start_offset..item.end - fragment.start_offset)
174194
.collect();
175-
Snippet {
176-
fragment: fragment_text.to_string(),
177-
highlighted,
178-
}
195+
Snippet::new(fragment_text, highlighted)
179196
} else {
180-
// when there no fragments to chose from,
181-
// for now create a empty snippet
182-
Snippet {
183-
fragment: String::new(),
184-
highlighted: vec![],
185-
}
197+
// When there are no fragments to chose from,
198+
// for now create an empty snippet.
199+
Snippet::empty()
186200
}
187201
}
188202

@@ -673,4 +687,22 @@ Survey in 2016, 2017, and 2018."#;
673687
assert_eq!(snippet.fragment, "abc");
674688
assert_eq!(snippet.to_html(), "<b>abc</b>");
675689
}
690+
691+
#[test]
692+
fn test_snippet_generator_custom_highlighted_elements() {
693+
let terms = btreemap! { String::from("rust") => 1.0, String::from("language") => 0.9 };
694+
let fragments = search_fragments(&From::from(SimpleTokenizer), TEST_TEXT, &terms, 100);
695+
let mut snippet = select_best_fragment_combination(&fragments[..], TEST_TEXT);
696+
assert_eq!(
697+
snippet.to_html(),
698+
"<b>Rust</b> is a systems programming <b>language</b> sponsored by\nMozilla which \
699+
describes it as a &quot;safe"
700+
);
701+
snippet.set_snippet_prefix_postfix("<q class=\"super\">", "</q>");
702+
assert_eq!(
703+
snippet.to_html(),
704+
"<q class=\"super\">Rust</q> is a systems programming <q class=\"super\">language</q> \
705+
sponsored by\nMozilla which describes it as a &quot;safe"
706+
);
707+
}
676708
}

0 commit comments

Comments
 (0)