Skip to content

Commit eb7a8b5

Browse files
authored
Collation rust updates - handle locales and other options (#482)
* ICU4X collation - mark rules as unsupported. Apply strength option. * Cargo fmt * Update collator.rs * Fix selection on left mouse to unescape HTML entities * Fix detail copy on left click to unescape HTML entitiees * Add comment to fix locale handling * fmt * Adding locale in ICU4X collation * Output the comparison numeric value * cargo fmt * Small refactor * Gemini suggestions * Fix boolean * Handling more collation options * Fix format
1 parent 53f063f commit eb7a8b5

File tree

1 file changed

+180
-19
lines changed

1 file changed

+180
-19
lines changed

executors/rust/src/collator.rs

Lines changed: 180 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,61 @@ use icu::collator::*;
88
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5", ver = "2.0-beta1")))]
99
use icu::collator::options::*;
1010

11-
use super::compat::{locale, pref};
11+
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5", ver = "2.0-beta1")))]
12+
use icu::collator::preferences::{CollationCaseFirst as CaseFirst, CollationNumericOrdering};
13+
14+
use super::compat::{pref, Locale};
1215

1316
// Function runs comparison using collator
1417
pub fn run_collation_test(json_obj: &Value) -> Result<Value, String> {
15-
// TODO: Handle errors of missing values and failures.
1618
let label = &json_obj["label"].as_str().unwrap();
17-
let ignore_punctuation: &Option<bool> = &json_obj["ignorePunctuation"].as_bool();
19+
let ignore_punctuation: Option<bool> = json_obj["ignorePunctuation"].as_bool();
1820
let str1: &str = json_obj["s1"].as_str().unwrap();
1921
let str2: &str = json_obj["s2"].as_str().unwrap();
2022

21-
// This may be missing
23+
// These fields may be missing in tests
2224
let compare_option: Option<&str> = json_obj["compare_type"].as_str();
23-
2425
let strength_option: Option<&str> = json_obj["strength"].as_str();
25-
2626
let rules: Option<&str> = json_obj["rules"].as_str();
2727

28+
let alternate_option: Option<&str> = json_obj["alternate"].as_str();
29+
let case_first_option: Option<&str> = json_obj["caseFirst"].as_str();
30+
let case_level_option: Option<&str> = json_obj["caseLevel"].as_str();
31+
let numeric_option: Option<&str> = json_obj["numeric"].as_str();
32+
let reorder_option: Option<&str> = json_obj["reorder"].as_str();
33+
let backwards_option: Option<&str> = json_obj["backwards"].as_str();
34+
let max_variable_option: Option<&str> = json_obj["maxVariable"].as_str();
35+
2836
#[cfg(any(ver = "1.3", ver = "1.4", ver = "1.5"))]
2937
let mut options = CollatorOptions::new();
3038
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5")))]
3139
let mut options = CollatorOptions::default();
3240

33-
// TODO: Get and apply locale if given. Else use "und" or "en"
41+
// Apply locale if given. Else use default locale.
42+
// Replace "root" with default locale
43+
let locale_name_opt = json_obj
44+
.get("locale")
45+
.map(|json_val| json_val.as_str().unwrap());
46+
let langid = match locale_name_opt {
47+
Some("root") | None => Locale::default(),
48+
Some(other) => match other.parse() {
49+
Ok(l) => l,
50+
Err(_) => {
51+
return Ok(json!({
52+
"label": label,
53+
"error_detail": other,
54+
"unsupported": "Unsupported locale",
55+
"error_type": "unsupported",
56+
}))
57+
}
58+
},
59+
};
60+
61+
#[cfg(any(ver = "1.3", ver = "1.4", ver = "1.5"))]
62+
let preferences: &icu_provider::DataLocale = pref!(&langid);
63+
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5")))]
64+
#[cfg_attr(ver = "2.0-beta1", allow(unused_mut))]
65+
let mut preferences: CollatorPreferences = pref!(&langid);
3466

3567
// Rules not yet supported.
3668
if rules.is_some() {
@@ -42,6 +74,16 @@ pub fn run_collation_test(json_obj: &Value) -> Result<Value, String> {
4274
}));
4375
}
4476

77+
if reorder_option.is_some() {
78+
// Reordering is only supported by the -kr option of a locale
79+
// See https://github.com/unicode-org/icu4x/issues/6033
80+
return Ok(json!({
81+
"label": label,
82+
"unsupported": "reorder scripts",
83+
"error_type": "unsupported",
84+
}));
85+
}
86+
4587
// Use compare_type to get < or =.
4688
let compare_symbol = compare_option.and_then(|c| c.get(0..1)).unwrap_or("<");
4789

@@ -63,18 +105,135 @@ pub fn run_collation_test(json_obj: &Value) -> Result<Value, String> {
63105
};
64106
};
65107

108+
if let Some(case_level) = case_level_option {
109+
options.case_level = match case_level {
110+
"off" => Some(CaseLevel::Off),
111+
"on" => Some(CaseLevel::On),
112+
_ => {
113+
return Ok(json!({
114+
"label": label,
115+
"error_detail": {"caseLevel": case_level},
116+
"unsupported": "caseLevel",
117+
"error_type": "unsupported",
118+
}));
119+
}
120+
}
121+
};
122+
123+
if let Some(case_first) = case_first_option {
124+
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5", ver = "2.0-beta1")))]
125+
{
126+
preferences.case_first = match case_first {
127+
"off" => Some(CaseFirst::False),
128+
"lower" => Some(CaseFirst::Lower),
129+
"upper" => Some(CaseFirst::Upper),
130+
_ => {
131+
return Ok(json!({
132+
"label": label,
133+
"error_detail": {"caseFirst": case_first},
134+
"unsupported": "caseFirst",
135+
"error_type": "unsupported",
136+
}));
137+
}
138+
}
139+
}
140+
#[cfg(any(ver = "1.3", ver = "1.4", ver = "1.5", ver = "2.0-beta1"))]
141+
{
142+
options.case_first = match case_first {
143+
"off" => Some(CaseFirst::Off),
144+
"lower" => Some(CaseFirst::LowerFirst),
145+
"upper" => Some(CaseFirst::UpperFirst),
146+
_ => {
147+
return Ok(json!({
148+
"label": label,
149+
"error_detail": {"caseFirst": case_first},
150+
"unsupported": "caseFirst",
151+
"error_type": "unsupported",
152+
}));
153+
}
154+
}
155+
}
156+
};
157+
158+
if let Some(backwards) = backwards_option {
159+
options.backward_second_level = match backwards {
160+
"off" => Some(BackwardSecondLevel::Off),
161+
"on" => Some(BackwardSecondLevel::On),
162+
_ => {
163+
return Ok(json!({
164+
"label": label,
165+
"error_detail": {"backwards": backwards},
166+
"unsupported": "backwards",
167+
"error_type": "unsupported",
168+
}));
169+
}
170+
}
171+
};
172+
173+
// Numeric sort order.
174+
// CollatorPreferences in beta2 vs. Enum Numeric
175+
if let Some(numeric) = numeric_option {
176+
#[cfg(not(any(ver = "1.3", ver = "1.4", ver = "1.5", ver = "2.0-beta1")))]
177+
{
178+
preferences.numeric_ordering = match numeric {
179+
"off" => Some(CollationNumericOrdering::False),
180+
"on" => Some(CollationNumericOrdering::True),
181+
_ => {
182+
return Ok(json!({
183+
"label": label,
184+
"error_detail": {"numeric": numeric},
185+
"unsupported": "numeric",
186+
"error_type": "unsupported",
187+
}));
188+
}
189+
}
190+
};
191+
// !!! TODO: handle before 2.0beta2
192+
};
193+
194+
if let Some(alternate) = alternate_option {
195+
options.alternate_handling = match alternate {
196+
"shifted" => Some(AlternateHandling::Shifted),
197+
"non-ignorable" => Some(AlternateHandling::NonIgnorable),
198+
_ => {
199+
return Ok(json!({
200+
"label": label,
201+
"error_detail": {"alternate": alternate},
202+
"unsupported": "alternate",
203+
"error_type": "unsupported",
204+
}));
205+
}
206+
}
207+
};
208+
209+
if let Some(max_variable) = max_variable_option {
210+
options.max_variable = match max_variable {
211+
"space" => Some(MaxVariable::Space),
212+
"punctuation" => Some(MaxVariable::Punctuation),
213+
"symbol" => Some(MaxVariable::Symbol),
214+
"currency" => Some(MaxVariable::Currency),
215+
_ => {
216+
return Ok(json!({
217+
"label": label,
218+
"error_detail": {"maxVariable": max_variable},
219+
"unsupported": "maxVariable",
220+
"error_type": "unsupported",
221+
}));
222+
}
223+
}
224+
};
225+
66226
// Ignore punctuation only if using shifted test.
67227
if let Some(ip) = ignore_punctuation {
68-
if *ip {
228+
if ip {
69229
options.alternate_handling = Some(AlternateHandling::Shifted);
70230
}
71231
}
72232

73233
// TODO !! Iterate to find actual level of comparison, then look
74234
// at compare type (1, 2, 3, 4, i, c) to see if it matches
75235

76-
let collator = Collator::try_new(pref!(locale!("en")), options).unwrap();
77-
236+
let collator = Collator::try_new(preferences, options).map_err(|e| e.to_string())?;
78237
let comparison = collator.compare(str1, str2);
79238

80239
let result_string = match compare_symbol {
@@ -92,19 +251,21 @@ pub fn run_collation_test(json_obj: &Value) -> Result<Value, String> {
92251
}
93252
};
94253

95-
let mut comparison_number: i16 = 0;
96-
if comparison == Ordering::Less {
97-
comparison_number = -1;
98-
} else if comparison == Ordering::Greater {
99-
comparison_number = 1;
100-
}
254+
let comparison_number: i16 = match comparison {
255+
Ordering::Less => -1,
256+
Ordering::Greater => 1,
257+
Ordering::Equal => 0,
258+
};
101259

102-
// TODO: Convert comparison to "<", "=", or ">"
103260
let json_result = json!({
104261
"label": label,
105262
"result": result_string,
106-
"compare_result": comparison_number,
107-
"actual_options": format!("{options:?}")
263+
"actual_options": {
264+
"options": format!("{options:?}, {preferences:?}"),
265+
"compared_result": comparison_number,
266+
"s1": str1,
267+
"s2": str2,
268+
}
108269
});
109270
Ok(json_result)
110271
}

0 commit comments

Comments
 (0)