diff --git a/examples/utils.cc b/examples/utils.cc index cbf09b2..ba91266 100644 --- a/examples/utils.cc +++ b/examples/utils.cc @@ -243,53 +243,132 @@ static int StoreICCP(j_decompress_ptr dinfo, std::string* const iccp) { return 1; } +typedef bool (*Handler)(const uint8_t* src, size_t len, void* obj); + +static bool DataCopy(const uint8_t* src, size_t len, void* obj) { + std::string* const dst = static_cast(obj); + if (dst->empty()) { + dst->append(reinterpret_cast(src), len); + return true; + } + return false; +} + +struct XMPExt { + std::string* xmp; + std::string ext; + const uint8_t* guid; + size_t size; + bool ok; +}; + +static uint32_t Get32b(const uint8_t* const src) { + return (static_cast(src[0]) << 24) | + (static_cast(src[1]) << 16) | + (static_cast(src[2]) << 8) | + (static_cast(src[3]) << 0); +} + +static bool XMPMerge(const uint8_t* src, size_t len, void* obj) { + XMPExt* const xmp = static_cast(obj); + xmp->ok = false; + if (len < 40) return false; + if (xmp->guid == nullptr) { + xmp->guid = src + 0; + } else { + if (memcmp(src, xmp->guid, 32)) return false; + } + const size_t total = Get32b(src + 32); + const size_t offset = Get32b(src + 36); + if (xmp->xmp == nullptr) return false; + if (xmp->ext.empty()) { + xmp->ext.resize(total); + } else { + // check size mismatch + if (xmp->ext.size() != total) return false; + } + src += 40; + len -= 40; + if (len > total) return false; + if (offset + len > xmp->ext.size()) return false; + + memcpy(&xmp->ext[offset], src, len); + xmp->size += len; + if (xmp->size > total) return false; // some overlap + if (xmp->size == total) { + uint8_t guid[32]; + sjpeg::MD5Digest(xmp->ext).Get(guid); + if (memcmp(guid, xmp->guid, 32)) return false; + // find the xmpNote and GUID in the XMP chunk, verify size + const size_t note_pos = xmp->xmp->find("xmpNote:HasExtendedXMP=\""); + if (note_pos == std::string::npos) return false; + if (note_pos + 24 + 32 + 1 > xmp->xmp->size()) return false; + // compare the main GUID in the XMP chunk + const char* const main_guid = &(*xmp->xmp)[note_pos + 24]; + if (memcmp(main_guid, guid, 32)) return false; + // all good + xmp->ok = true; + } + return true; +} + // Returns true on success and false for memory errors and corrupt profiles. // The caller must use MetadataFree() on 'metadata' in all cases. -static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo, +static bool ExtractMetadataFromJPEG(j_decompress_ptr dinfo, EncoderParam* const param) { - if (param == NULL) return true; + if (param == nullptr) return true; param->ResetMetadata(); + XMPExt xmp_ext = { ¶m->xmp, "", nullptr, 0, false }; const struct { int marker; const char* signature; size_t signature_length; - std::string* data; + void* data; + Handler handler; } metadata_map[] = { // Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ... - { JPEG_APP1, "Exif\0", 6, ¶m->exif }, + { JPEG_APP1, "Exif\0", 6, ¶m->exif, DataCopy }, // XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG - // TODO(jzern) Add support for 'ExtendedXMP' - { JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, ¶m->xmp }, - { 0, NULL, 0, 0 }, + { JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, ¶m->xmp, DataCopy }, + // XMP Extended + { JPEG_APP1, "http://ns.adobe.com/xmp/extension/", 35, &xmp_ext, XMPMerge }, + // Fake ICC handler, even if ICC is treated separately. This is to prevent + // storing the ICC data as 'app_markers'. + { JPEG_APP2, "ICC_PROFILE", 12, nullptr, nullptr }, }; jpeg_saved_marker_ptr marker; // Treat ICC profiles separately as they may be segmented and out of order. - if (!StoreICCP(dinfo, ¶m->iccp)) return 0; + if (!StoreICCP(dinfo, ¶m->iccp)) return false; for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) { - int i; - for (i = 0; metadata_map[i].marker != 0; ++i) { - if (marker->marker == metadata_map[i].marker && - marker->data_length > metadata_map[i].signature_length && - !memcmp(marker->data, metadata_map[i].signature, - metadata_map[i].signature_length)) { - std::string* const payload = metadata_map[i].data; - - if (payload->size() == 0) { - const char* marker_data = - reinterpret_cast(marker->data) + - metadata_map[i].signature_length; - const size_t marker_data_length = - marker->data_length - metadata_map[i].signature_length; - payload->append(marker_data, marker_data_length); - } else { - fprintf(stderr, "Ignoring additional '%s' marker\n", - metadata_map[i].signature); + bool found = false; + for (const auto& m : metadata_map) { + if (marker->marker == m.marker && + marker->data_length > m.signature_length && + !memcmp(marker->data, m.signature, m.signature_length)) { + const uint8_t* const data = marker->data + m.signature_length; + const size_t data_length = marker->data_length - m.signature_length; + if (m.handler != nullptr && !m.handler(data, data_length, m.data)) { + fprintf(stderr, "Ignoring '%s' marker\n", m.signature); } + found = true; + break; } } - } - return 1; + // append to app_markers + if (!found) { + char header[4]; + header[0] = 0xff; + header[1] = marker->marker; + header[2] = ((marker->data_length + 2) >> 8) & 0xff; + header[3] = ((marker->data_length + 2) >> 0) & 0xff; + param->app_markers.append(header, sizeof(header)); + param->app_markers.append(reinterpret_cast(marker->data), + marker->data_length); + } + } + if (xmp_ext.ok) param->xmp += xmp_ext.ext; + return true; } #undef JPEG_APP1 diff --git a/src/dichotomy.cc b/src/dichotomy.cc index 44a28d7..a05c1ff 100644 --- a/src/dichotomy.cc +++ b/src/dichotomy.cc @@ -207,6 +207,9 @@ size_t Encoder::HeaderSize() const { } if (xmp_.size() > 0) { size += 2 + 2 + 29 + xmp_.size(); + if (xmp_.size() > 65533) { // XMPExtended + size += (xmp_.size() / 65458 + 1) * 40; + } } size += 2 * 65 + 2 + 2; // DQT size += 8 + 3 * nb_comps_ + 2; // SOF diff --git a/src/enc.cc b/src/enc.cc index 2037a69..f230ba6 100644 --- a/src/enc.cc +++ b/src/enc.cc @@ -2102,8 +2102,9 @@ void EncoderParam::SetMinQuantization(const uint8_t m[2][64], void EncoderParam::ResetMetadata() { iccp.clear(); exif.clear(); - xmp.clear(); app_markers.clear(); + xmp.clear(); + xmp_split_point = 0u; } bool Encoder::InitFromParam(const EncoderParam& param) { @@ -2126,8 +2127,9 @@ bool Encoder::InitFromParam(const EncoderParam& param) { SetMetadata(param.iccp, Encoder::ICC); SetMetadata(param.exif, Encoder::EXIF); - SetMetadata(param.xmp, Encoder::XMP); SetMetadata(param.app_markers, Encoder::MARKERS); + SetMetadata(param.xmp, Encoder::XMP); + xmp_split_ = param.xmp_split_point; passes_ = (param.passes < 1) ? 1 : (param.passes > 20) ? 20 : param.passes; if (passes_ > 1) { diff --git a/src/headers.cc b/src/headers.cc index 7cce007..f8963cf 100644 --- a/src/headers.cc +++ b/src/headers.cc @@ -20,11 +20,23 @@ #include #include #include +#include #include "sjpegi.h" +#include "md5sum.h" namespace sjpeg { +void Encoder::Put16b(uint32_t size) { + bw_.PutByte((size >> 8) & 0xff); + bw_.PutByte((size >> 0) & 0xff); +} + +void Encoder::Put32b(uint32_t size) { + Put16b(size >> 16); + Put16b(size >> 0); +} + //////////////////////////////////////////////////////////////////////////////// // Headers // @@ -65,10 +77,8 @@ bool Encoder::WriteEXIF(const std::string& data) { if (data_size > 0xffff) return false; ok_ = ok_ && bw_.Reserve(data_size + 2); if (!ok_) return false; - bw_.PutByte(0xff); - bw_.PutByte(0xe1); - bw_.PutByte((data_size >> 8) & 0xff); - bw_.PutByte((data_size >> 0) & 0xff); + Put16b(0xffe1); + Put16b(data_size); bw_.PutBytes(kEXIF, kEXIF_len); bw_.PutBytes(reinterpret_cast(data.data()), data.size()); return true; @@ -80,19 +90,17 @@ bool Encoder::WriteICCP(const std::string& data) { const uint8_t* ptr = reinterpret_cast(data.data()); const uint8_t kICCP[] = "ICC_PROFILE"; const size_t kICCP_len = 12; // includes the \0 - const size_t chunk_size_max = 0xffff - kICCP_len - 4; - size_t max_chunk = (data_size + chunk_size_max - 1) / chunk_size_max; + const size_t kMaxChunkSize = 0xffff - kICCP_len - 4; + size_t max_chunk = (data_size + kMaxChunkSize - 1) / kMaxChunkSize; if (max_chunk >= 256) return false; size_t seq = 1; while (data_size > 0) { - size_t size = data_size; - if (size > chunk_size_max) size = chunk_size_max; - ok_ = ok_ && bw_.Reserve(size + kICCP_len + 4 + 2); + const size_t size = std::min(kMaxChunkSize, data_size); + const size_t total_size = size + kICCP_len + 4; + ok_ = ok_ && bw_.Reserve(total_size + 2); if (!ok_) return false; - bw_.PutByte(0xff); - bw_.PutByte(0xe2); - bw_.PutByte(((size + kICCP_len + 4) >> 8) & 0xff); - bw_.PutByte(((size + kICCP_len + 4) >> 0) & 0xff); + Put16b(0xffe2); + Put16b(total_size); bw_.PutBytes(kICCP, kICCP_len); bw_.PutByte(seq & 0xff); bw_.PutByte(max_chunk & 0xff); @@ -104,21 +112,71 @@ bool Encoder::WriteICCP(const std::string& data) { return true; } +bool Encoder::WriteXMPExtended(const std::string& data) { + const size_t kMainSize = 65503; + if (data.size() < kMainSize) return true; // too short! should be a main XMP + if (data.size() > (1u << 31)) return false; // too large + size_t split = (xmp_split_ == 0) ? kMainSize : xmp_split_; + split = std::min(split, data.size()); + // search for the extension tag + const size_t note_pos = data.find(std::string("xmpNote:HasExtendedXMP=\"")); + if (note_pos == std::string::npos) return false; // no extension! + if (note_pos + 24 + 32 + 1 > split) return false; // ill-formed + if (data[note_pos + 24 + 32] != '\"') return false; + // split in main / extension + std::string main_data(&data[0], split); + std::string ext_data(&data[split], data.size() - split); + // compute GUID + uint8_t* const guid = reinterpret_cast(&main_data[note_pos + 24]); + MD5Digest(ext_data).Get(guid); + + // Main chunk. + if (!WriteXMP(main_data)) return false; + + // Extended chunks. + const uint8_t kXMPExt[] = "http://ns.adobe.com/xmp/extension/"; + const size_t kXMPExt_size = sizeof(kXMPExt); + const size_t kBufSize = 65458; + + // 40 = 32 bytes for GUID + 4 bytes for size + 4 bytes for position + const size_t kHeaderSize = kXMPExt_size + 40; + const size_t num_chunks = ext_data.size() / kBufSize + 1; + const size_t data_size = num_chunks * (kHeaderSize + 2 + 2) + ext_data.size(); + ok_ = ok_ && bw_.Reserve(data_size); + if (!ok_) return false; + size_t read_pos = 0, write_pos = 0; + for (uint32_t chunk = 0; chunk < num_chunks; ++chunk) { + const uint32_t write_size = std::min(kBufSize, ext_data.size() - read_pos); + Put16b(0xffe1); // APP1 + Put16b(2 + kHeaderSize + write_size); + bw_.PutBytes(kXMPExt, kXMPExt_size); + bw_.PutBytes(guid, 32u); + Put32b(ext_data.size()); // total size, not chunk size! + Put32b(read_pos); + bw_.PutBytes(reinterpret_cast(&ext_data[read_pos]), + write_size); + read_pos += write_size; + write_pos += 2 + (2 + kHeaderSize + write_size); + } + return (write_pos == data_size); +} + bool Encoder::WriteXMP(const std::string& data) { if (data.size() == 0) return true; const uint8_t kXMP[] = "http://ns.adobe.com/xap/1.0/"; - const size_t kXMP_size = 29; + const size_t kXMP_size = sizeof(kXMP); const size_t data_size = 2 + data.size() + kXMP_size; - if (data_size > 0xffff) return false; // error + if (data_size <= 0xffff) { // don't use extended XMP if small enough data ok_ = ok_ && bw_.Reserve(data_size + 2); if (!ok_) return false; - bw_.PutByte(0xff); - bw_.PutByte(0xe1); - bw_.PutByte((data_size >> 8) & 0xff); - bw_.PutByte((data_size >> 0) & 0xff); + Put16b(0xffe1); + Put16b(data_size); bw_.PutBytes(kXMP, kXMP_size); bw_.PutBytes(reinterpret_cast(data.data()), data.size()); return true; + } + // need to split into main-chunk + extended sections + return WriteXMPExtended(data); } void Encoder::WriteDQT() { @@ -170,10 +228,8 @@ void Encoder::WriteDHT() { assert(data_size <= 255); ok_ = ok_ && bw_.Reserve(data_size + 2); if (!ok_) return; - bw_.PutByte(0xff); - bw_.PutByte(0xc4); - bw_.PutByte(0x00 /*data_size >> 8*/); - bw_.PutByte(data_size); + Put16b(0xffc4); + Put16b(data_size); bw_.PutByte((type << 4) | c); bw_.PutBytes(h->bits_, 16); bw_.PutBytes(h->syms_, h->nb_syms_); @@ -208,8 +264,7 @@ void Encoder::WriteEOI() { // EOI ok_ = ok_ && bw_.Reserve(2); if (!ok_) return; // append EOI - bw_.PutByte(0xff); - bw_.PutByte(0xd9); + Put16b(0xffd9); } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/sjpeg.h b/src/sjpeg.h index bb779ca..674f155 100644 --- a/src/sjpeg.h +++ b/src/sjpeg.h @@ -240,15 +240,22 @@ struct EncoderParam { // if null, a default implementation will be used sjpeg::SearchHook* search_hook; - // metadata: extra EXIF/XMP/ICCP data that will be embedded in + // metadata: extra EXIF/XMP/XMPExt/ICCP data that will be embedded in // APP1 or APP2 markers. They should contain only the raw payload and not // the prefixes ("Exif\0", "ICC_PROFILE", etc...). These will be added - // automatically during encoding. + // automatically during encoding. If XMP data is larger than 65504 bytes, + // XMPExtended chunks will be used. + // It is the caller's responsibility to make sure that the xmp data contains + // the necessary extension syntax (xmpNote:HasExtended="...") somewhere + // in the first 65504 bytes. The MD5 digest will be inserted at the correct + // location. If xmp_split_point is not 0, it will be used to split the xmp + // data chunk. // Conversely, the content of app_markers is written as is, right after APP0. std::string exif; - std::string xmp; std::string iccp; std::string app_markers; + std::string xmp; + uint16_t xmp_split_point = 0u; // user-supplied split point for extended XMP void ResetMetadata(); // clears the above // Memory manager used by the codec. If null, default one will be used. diff --git a/src/sjpegi.h b/src/sjpegi.h index 127f475..79eccf1 100644 --- a/src/sjpegi.h +++ b/src/sjpegi.h @@ -221,11 +221,15 @@ struct Encoder { private: bool CheckBuffers(); // returns false in case of memory alloc error + void Put16b(uint32_t size); + void Put32b(uint32_t size); + void WriteAPP0(); bool WriteAPPMarkers(const std::string& data); bool WriteEXIF(const std::string& data); bool WriteICCP(const std::string& data); bool WriteXMP(const std::string& data); + bool WriteXMPExtended(const std::string& data); void WriteDQT(); void WriteSOF(); void WriteDHT(); @@ -342,6 +346,7 @@ struct Encoder { sjpeg::BitWriter bw_; // output buffer std::string iccp_, xmp_, exif_, app_markers_; // metadata + uint16_t xmp_split_; // user-supplied split point for extended metadata // compression tools. See sjpeg.h for description of methods. bool optimize_size_; // Huffman-optimize the codes (method 0, 3)