Skip to content

Commit b38064f

Browse files
committed
Fix input encoding.
1 parent 0328a1f commit b38064f

File tree

3 files changed

+47
-33
lines changed

3 files changed

+47
-33
lines changed

encoding.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -213,22 +213,22 @@ unsigned char unicode_to_koi7(unsigned short val)
213213
/* `abcdefg */ 0, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, // ABCDEFG
214214
/* hijklmno */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, // HIJKLMNO
215215
/* pqrstuvw */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, // PQRSTUVW
216-
/* xyz{|}~ */ 0x58, 0x59, 0x5a, 0, 0, 0, 0, 0, // XYZ
216+
/* xyz{|}~ */ 0x58, 0x59, 0x5a, 0, 0x3f, 0, 0, 0, // XYZ |
217217
/* 80 - 87 */ 0, 0, 0, 0, 0, 0, 0, 0,
218218
/* 88 - 8f */ 0, 0, 0, 0, 0, 0, 0, 0,
219219
/* 90 - 97 */ 0, 0, 0, 0, 0, 0, 0, 0,
220220
/* 98 - 9f */ 0, 0, 0, 0, 0, 0, 0, 0,
221221
/* a0 - a7 */ 0, 0, 0, 0, 0, 0, 0, 0,
222-
/* a8 - af */ 0, 0, 0, 0, 0, 0, 0, 0,
223-
/* b0 - b7 */ 0, 0, 0, 0, 0, 0, 0, 0,
222+
/* a8 - af */ 0, 0, 0, 0, 0x1f, 0, 0, 0, // ¬
223+
/* b0 - b7 */ 0x19, 0, 0, 0, 0, 0, 0, 0, // °
224224
/* b8 - bf */ 0, 0, 0, 0, 0, 0, 0, 0,
225225
/* c0 - c7 */ 0, 0, 0, 0, 0, 0, 0, 0,
226226
/* c8 - cf */ 0, 0, 0, 0, 0, 0, 0, 0,
227-
/* d0 - d7 */ 0, 0, 0, 0, 0, 0, 0, 0,
227+
/* d0 - d7 */ 0, 0, 0, 0, 0, 0, 0, 0x06, // ×
228228
/* d8 - df */ 0, 0, 0, 0, 0, 0, 0, 0,
229229
/* e0 - e7 */ 0, 0, 0, 0, 0, 0, 0, 0,
230230
/* e8 - ef */ 0, 0, 0, 0, 0, 0, 0, 0,
231-
/* f0 - f7 */ 0, 0, 0, 0, 0, 0, 0, 0,
231+
/* f0 - f7 */ 0, 0, 0, 0, 0, 0, 0, 0x1a, // ÷
232232
/* f8 - ff */ 0, 0, 0, 0, 0, 0, 0, 0,
233233
// clang-format on
234234
};
@@ -390,15 +390,17 @@ unsigned char unicode_to_koi7(unsigned short val)
390390
case 0x20:
391391
switch ((unsigned char)val) {
392392
case 0x15:
393-
return '-';
393+
return '\25'; //
394394
case 0x18:
395-
return 0; // `
395+
return '\20'; //
396396
case 0x19:
397-
return '\'';
397+
return '\33'; //
398398
case 0x28:
399399
return 0x0a;
400400
case 0x32:
401401
return '\'';
402+
case 0x3e:
403+
return '^';
402404
}
403405
break;
404406
case 0x21:
@@ -414,14 +416,28 @@ unsigned char unicode_to_koi7(unsigned short val)
414416
case 0x27:
415417
return '^';
416418
case 0x28:
417-
return 'v';
419+
return '\36'; //
418420
case 0x60:
419421
return '#';
422+
case 0x61:
423+
return '\35'; //
424+
case 0x64:
425+
return '\16'; //
426+
case 0x65:
427+
return '\17'; //
428+
case 0x83:
429+
return '\34'; //
430+
}
431+
break;
432+
case 0x23:
433+
switch ((unsigned char)val) {
434+
case 0xe8:
435+
return '\27'; //
420436
}
421437
break;
422438
case 0x25:
423439
switch ((unsigned char)val) {
424-
case 0xca:
440+
case 0xc7:
425441
return '$';
426442
}
427443
break;

tests/cosy_test.cpp

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -135,28 +135,26 @@ TEST(unit, utf8_to_koi7)
135135
EXPECT_EQ(utf8_to_koi7("ПЯPCTYЖBЬЫЗШЭЩЧ ", 16), "pqPCTYvBxyz{|}~ ");
136136

137137
EXPECT_EQ(utf8_to_koi7("Ъ", 1), "\5");
138-
//TODO: EXPECT_EQ(utf8_to_koi7("×", 1), "\6");
139-
//TODO: EXPECT_EQ(utf8_to_koi7("≤", 1), "\16");
140-
//TODO: EXPECT_EQ(utf8_to_koi7("≥", 1), "\17");
141-
//TODO: EXPECT_EQ(utf8_to_koi7("‘", 1), "\20");
142-
//TODO: EXPECT_EQ(utf8_to_koi7("―", 1), "\25");
143-
//TODO: EXPECT_EQ(utf8_to_koi7("↑", 1), "\26");
144-
//TODO: EXPECT_EQ(utf8_to_koi7("⏨", 1), "\27");
145-
146-
//TODO: EXPECT_EQ(utf8_to_koi7("≠", 1), "\30");
147-
//TODO: EXPECT_EQ(utf8_to_koi7("°", 1), "\31");
148-
//TODO: EXPECT_EQ(utf8_to_koi7("÷", 1), "\32");
149-
//TODO: EXPECT_EQ(utf8_to_koi7("’", 1), "\33");
150-
//TODO: EXPECT_EQ(utf8_to_koi7("⊃", 1), "\34");
151-
//TODO: EXPECT_EQ(utf8_to_koi7("≡", 1), "\35");
152-
//TODO: EXPECT_EQ(utf8_to_koi7("∨", 1), "\36");
153-
//TODO: EXPECT_EQ(utf8_to_koi7("¬", 1), "\37");
154-
155-
EXPECT_EQ(utf8_to_koi7("", 1), "#");
156-
//TODO: EXPECT_EQ(utf8_to_koi7("◇", 1), "$");
138+
EXPECT_EQ(utf8_to_koi7("×", 1), "\6");
139+
EXPECT_EQ(utf8_to_koi7("", 1), "\16");
140+
EXPECT_EQ(utf8_to_koi7("", 1), "\17");
141+
EXPECT_EQ(utf8_to_koi7("", 1), "\20");
142+
EXPECT_EQ(utf8_to_koi7("", 1), "\25");
143+
EXPECT_EQ(utf8_to_koi7("", 1), "@"); // TODO: must be "\26"
144+
EXPECT_EQ(utf8_to_koi7("", 1), "\27");
145+
146+
EXPECT_EQ(utf8_to_koi7("", 1), "#"); // TODO: must be "\30"
147+
EXPECT_EQ(utf8_to_koi7("°", 1), "\31");
148+
EXPECT_EQ(utf8_to_koi7("÷", 1), "\32");
149+
EXPECT_EQ(utf8_to_koi7("", 1), "\33");
150+
EXPECT_EQ(utf8_to_koi7("", 1), "\34");
151+
EXPECT_EQ(utf8_to_koi7("", 1), "\35");
152+
EXPECT_EQ(utf8_to_koi7("", 1), "\36");
153+
EXPECT_EQ(utf8_to_koi7("¬", 1), "\37");
154+
155+
EXPECT_EQ(utf8_to_koi7("", 1), "$");
157156
EXPECT_EQ(utf8_to_koi7("", 1), "^");
158157
EXPECT_EQ(utf8_to_koi7("", 1), "'");
159-
EXPECT_EQ(utf8_to_koi7("", 1), "@");
160-
//TODO: EXPECT_EQ(utf8_to_koi7("‾", 1), "^");
161-
//TODO: EXPECT_EQ(utf8_to_koi7("|", 1), "|");
158+
EXPECT_EQ(utf8_to_koi7("", 1), "^");
159+
EXPECT_EQ(utf8_to_koi7("|", 1), "?"); // TODO: is this OK?
162160
}

tests/session_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ TEST_F(dubna_session, file_scratch)
825825
*edit
826826
*RO
827827
*W:44
828-
Twas brillig, and the slithy toves
828+
'Twas brillig, and the slithy toves
829829
Did gyre and gimble in the wabe:
830830
All mimsy were the borogoves,
831831
And the mome raths outgrabe.

0 commit comments

Comments
 (0)