File tree Expand file tree Collapse file tree 2 files changed +14
-0
lines changed
Expand file tree Collapse file tree 2 files changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -587,10 +587,20 @@ OFCondition DcmSpecificCharacterSet::convertStringWithoutCodeExtensions(const ch
587587 size_t pos = 0 ;
588588 const char *firstChar = fromString;
589589 const char *currentChar = fromString;
590+ const bool isMultiByte = isNonAsciiConformMultiByteSingleValueCharacterSet (SourceCharacterSet);
591+
590592 // iterate over all characters of the string (as long as there is no error)
591593 while ((pos < fromLength) && status.good ())
592594 {
593595 const char c0 = *currentChar++;
596+ if (isMultiByte && (c0 & 0x80 ) != 0 )
597+ {
598+ // this is a 2-byte character or the first or second part
599+ // of a 4-byte character - skip the next byte
600+ currentChar++;
601+ pos += 2 ;
602+ continue ;
603+ }
594604 // check for characters HT, LF, FF, CR or any other specified delimiter
595605 const OFBool isDelimiter = ((c0 == ' \011 ' ) || (c0 == ' \012 ' ) || (c0 == ' \014 ' ) || (c0 == ' \015 ' ) ||
596606 (delimiters.find (c0) != OFString_npos));
Original file line number Diff line number Diff line change @@ -155,6 +155,10 @@ OFTEST(dcmdata_specificCharacterSet_3)
155155 OFCHECK (converter.selectCharacterSet (" GB18030" ).good ());
156156 OFCHECK (converter.convertString (" Wang^XiaoDong=\315\365 ^\320\241\266\253 =" , resultStr, delimiters).good ());
157157 OFCHECK_EQUAL (resultStr, " Wang^XiaoDong=\347\216\213 ^\345\260\217\344\270\234 =" );
158+ // check whether a byte looking like a delimiter inside a multi-byte character is not handled as delimiter
159+ // 0x5c is the byte for a backslash in single-byte encodings, but here part of two Kanji characters
160+ OFCHECK (converter.convertString (" Noriwa=\x81\x5c\x82\x5c " , resultStr, delimiters).good ());
161+ OFCHECK_EQUAL (resultStr, " Noriwa=\xe4\xb9\x97\xe4\xbf\x93 " );
158162 // check whether string conversion from Chinese language to UTF-8 works
159163 // example taken from DICOM PS 3.5 Annex K.2
160164 OFCHECK (converter.selectCharacterSet (" \\ ISO 2022 IR 58" ).good ());
You can’t perform that action at this time.
0 commit comments