Skip to content

Commit ac47ca4

Browse files
committed
Avoid toLowerCasing İ (\u0130) - it changes
(closes osmlab#8261) This was tricky because you'd think that case insensitive regex /i would catch both upper and lower case variants of this, but it doesn't. Then, I tried to match both variants with a regex like '^(İ|i̇)nşaat malları$', but toLowerCasing *that* regex in our file_tree writing code was changing the 'İ'. So for now, our build scripts can just avoid toLowerCasing a string with a 'İ' in it.
1 parent 3f3a19b commit ac47ca4

File tree

5 files changed

+27
-19
lines changed

5 files changed

+27
-19
lines changed

data/brands/shop/doityourself.json

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
"path": "brands/shop/doityourself",
44
"exclude": {
55
"generic": [
6+
"^(İ|i̇)nşaat malları$",
67
"^(сантехника|стройматериалы)$",
78
"^doityourself$",
8-
"^i̇nşaat malları$",
99
"^будматеріали$",
1010
"^хоз(товары)$"
1111
],
@@ -736,16 +736,6 @@
736736
"shop": "doityourself"
737737
}
738738
},
739-
{
740-
"displayName": "İnşaat Malları",
741-
"id": "insaatmallari-092aaf",
742-
"locationSet": {"include": ["001"]},
743-
"tags": {
744-
"brand": "İnşaat Malları",
745-
"name": "İnşaat Malları",
746-
"shop": "doityourself"
747-
}
748-
},
749739
{
750740
"displayName": "jem & fix",
751741
"id": "jemandfix-27539e",

lib/file_tree.js

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,18 @@ write: (cache) => {
305305
});
306306

307307

308-
function _clean(val) {
309-
if (typeof val !== 'string') return val;
310-
return val.trim();
308+
function _clean(s) {
309+
if (typeof s !== 'string') return s;
310+
return s.trim();
311311
}
312-
function _cleanLower(val) {
313-
if (typeof val !== 'string') return val;
314-
return val.trim().toLowerCase();
312+
313+
function _cleanLower(s) {
314+
if (typeof s !== 'string') return s;
315+
if (/İ/.test(s)) { // Avoid toLowerCasing this one, it changes - #8261
316+
return s.trim();
317+
} else {
318+
return s.trim().toLowerCase();
319+
}
315320
}
316321
},
317322

lib/simplify.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export function simplify(str) {
99
return diacritics.remove(
1010
str
1111
.replace(/&/g, 'and')
12-
.replace(/İ/ig, 'i') // for BİM, İşbank - #5017
12+
.replace(/(İ|i̇)/ig, 'i') // for BİM, İşbank - #5017, #8261
1313
.replace(/[\s\-=_!"#%'*{},.\/:;?\(\)\[\]@\\$\^*+<>«»~`\u00a1\u00a7\u00b6\u00b7\u00bf\u037e\u0387\u055a-\u055f\u0589\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d\u07f7-\u07f9\u0830-\u083e\u085e\u0964\u0965\u0970\u0af0\u0df4\u0e4f\u0e5a\u0e5b\u0f04-\u0f12\u0f14\u0f85\u0fd0-\u0fd4\u0fd9\u0fda\u104a-\u104f\u10fb\u1360-\u1368\u166d\u166e\u16eb-\u16ed\u1735\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u1805\u1807-\u180a\u1944\u1945\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-\u1b60\u1bfc-\u1bff\u1c3b-\u1c3f\u1c7e\u1c7f\u1cc0-\u1cc7\u1cd3\u2000-\u206f\u2cf9-\u2cfc\u2cfe\u2cff\u2d70\u2e00-\u2e7f\u3001-\u3003\u303d\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uaaf0\uaaf1\uabeb\ufe10-\ufe16\ufe19\ufe30\ufe45\ufe46\ufe49-\ufe4c\ufe50-\ufe52\ufe54-\ufe57\ufe5f-\ufe61\ufe68\ufe6a\ufe6b\ufeff\uff01-\uff03\uff05-\uff07\uff0a\uff0c\uff0e\uff0f\uff1a\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65]+/g,'')
1414
.toLowerCase()
1515
);

scripts/build_index.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,14 @@ function loadConfig() {
116116
data.replacements = sortObject(data.replacements);
117117

118118
} else if (which === 'genericWords') {
119-
data.genericWords = data.genericWords.map(s => s.toLowerCase()).sort(withLocale);
119+
data.genericWords = data.genericWords.map(s => {
120+
if (/İ/.test(s)) { // Avoid toLowerCasing this one, it changes - #8261
121+
return s.trim();
122+
} else {
123+
return s.trim().toLowerCase();
124+
}
125+
})
126+
.sort(withLocale);
120127
}
121128

122129
// Lowercase and sort the files for consistency, save them that way.

tests/simplify.test.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ test('simplify', t => {
4646
t.end();
4747
});
4848

49+
t.test('replaces İ (0130) or i̇ (0069 0307) with i', t => { // #5017, #8261 for examples
50+
t.equal(simplify('İnşaat'), 'insaat');
51+
t.equal(simplify('i̇nşaat'), 'insaat');
52+
t.end();
53+
});
54+
4955
t.test('returns empty string if no input', t => {
5056
t.equal(simplify(), '');
5157
t.equal(simplify(null), '');

0 commit comments

Comments
 (0)