Skip to content

Commit a8044bb

Browse files
committed
Handle multi-byte single-value encodings in conversion
- fixes the problem described in bug #684
1 parent 08b7929 commit a8044bb

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

dcmdata/libsrc/dcspchrs.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,10 +587,20 @@ OFCondition DcmSpecificCharacterSet::convertStringWithoutCodeExtensions(const ch
587587
size_t pos = 0;
588588
const char *firstChar = fromString;
589589
const char *currentChar = fromString;
590+
const bool isMultiByte = isNonAsciiConformMultiByteSingleValueCharacterSet(SourceCharacterSet);
591+
590592
// iterate over all characters of the string (as long as there is no error)
591593
while ((pos < fromLength) && status.good())
592594
{
593595
const char c0 = *currentChar++;
596+
if (isMultiByte && (c0 & 0x80) != 0)
597+
{
598+
// this is a 2-byte character or the first or second part
599+
// of a 4-byte character - skip the next byte
600+
currentChar++;
601+
pos += 2;
602+
continue;
603+
}
594604
// check for characters HT, LF, FF, CR or any other specified delimiter
595605
const OFBool isDelimiter = ((c0 == '\011') || (c0 == '\012') || (c0 == '\014') || (c0 == '\015') ||
596606
(delimiters.find(c0) != OFString_npos));

dcmdata/tests/tspchrs.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ OFTEST(dcmdata_specificCharacterSet_3)
155155
OFCHECK(converter.selectCharacterSet("GB18030").good());
156156
OFCHECK(converter.convertString("Wang^XiaoDong=\315\365^\320\241\266\253=", resultStr, delimiters).good());
157157
OFCHECK_EQUAL(resultStr, "Wang^XiaoDong=\347\216\213^\345\260\217\344\270\234=");
158+
// check whether a byte looking like a delimiter inside a multi-byte character is not handled as delimiter
159+
// 0x5c is the byte for a backslash in single-byte encodings, but here part of two Kanji characters
160+
OFCHECK(converter.convertString("Noriwa=\x81\x5c\x82\x5c", resultStr, delimiters).good());
161+
OFCHECK_EQUAL(resultStr, "Noriwa=\xe4\xb9\x97\xe4\xbf\x93");
158162
// check whether string conversion from Chinese language to UTF-8 works
159163
// example taken from DICOM PS 3.5 Annex K.2
160164
OFCHECK(converter.selectCharacterSet("\\ISO 2022 IR 58").good());

0 commit comments

Comments
 (0)