Skip to content

Commit

Permalink
sjpeg: implement XMPExtended metadata writing + reading
Browse files Browse the repository at this point in the history
The spec is here (section 1.1.3.1):
 https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart3.pdf

+ misc clean-up

Change-Id: I40aa21f90fe220ebec5b42fcc081b63c198c91f2
  • Loading branch information
Skal committed Mar 2, 2020
1 parent 8d23b44 commit fc8138a
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 58 deletions.
135 changes: 107 additions & 28 deletions examples/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,53 +243,132 @@ static int StoreICCP(j_decompress_ptr dinfo, std::string* const iccp) {
return 1;
}

typedef bool (*Handler)(const uint8_t* src, size_t len, void* obj);

static bool DataCopy(const uint8_t* src, size_t len, void* obj) {
std::string* const dst = static_cast<std::string*>(obj);
if (dst->empty()) {
dst->append(reinterpret_cast<const char*>(src), len);
return true;
}
return false;
}

struct XMPExt {
std::string* xmp;
std::string ext;
const uint8_t* guid;
size_t size;
bool ok;
};

static uint32_t Get32b(const uint8_t* const src) {
return (static_cast<uint32_t>(src[0]) << 24) |
(static_cast<uint32_t>(src[1]) << 16) |
(static_cast<uint32_t>(src[2]) << 8) |
(static_cast<uint32_t>(src[3]) << 0);
}

static bool XMPMerge(const uint8_t* src, size_t len, void* obj) {
XMPExt* const xmp = static_cast<XMPExt*>(obj);
xmp->ok = false;
if (len < 40) return false;
if (xmp->guid == nullptr) {
xmp->guid = src + 0;
} else {
if (memcmp(src, xmp->guid, 32)) return false;
}
const size_t total = Get32b(src + 32);
const size_t offset = Get32b(src + 36);
if (xmp->xmp == nullptr) return false;
if (xmp->ext.empty()) {
xmp->ext.resize(total);
} else {
// check size mismatch
if (xmp->ext.size() != total) return false;
}
src += 40;
len -= 40;
if (len > total) return false;
if (offset + len > xmp->ext.size()) return false;

memcpy(&xmp->ext[offset], src, len);
xmp->size += len;
if (xmp->size > total) return false; // some overlap
if (xmp->size == total) {
uint8_t guid[32];
sjpeg::MD5Digest(xmp->ext).Get(guid);
if (memcmp(guid, xmp->guid, 32)) return false;
// find the xmpNote and GUID in the XMP chunk, verify size
const size_t note_pos = xmp->xmp->find("xmpNote:HasExtendedXMP=\"");
if (note_pos == std::string::npos) return false;
if (note_pos + 24 + 32 + 1 > xmp->xmp->size()) return false;
// compare the main GUID in the XMP chunk
const char* const main_guid = &(*xmp->xmp)[note_pos + 24];
if (memcmp(main_guid, guid, 32)) return false;
// all good
xmp->ok = true;
}
return true;
}

// Returns true on success and false for memory errors and corrupt profiles.
// The caller must use MetadataFree() on 'metadata' in all cases.
static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
static bool ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
EncoderParam* const param) {
if (param == NULL) return true;
if (param == nullptr) return true;
param->ResetMetadata();
XMPExt xmp_ext = { &param->xmp, "", nullptr, 0, false };
const struct {
int marker;
const char* signature;
size_t signature_length;
std::string* data;
void* data;
Handler handler;
} metadata_map[] = {
// Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
{ JPEG_APP1, "Exif\0", 6, &param->exif },
{ JPEG_APP1, "Exif\0", 6, &param->exif, DataCopy },
// XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
// TODO(jzern) Add support for 'ExtendedXMP'
{ JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, &param->xmp },
{ 0, NULL, 0, 0 },
{ JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, &param->xmp, DataCopy },
// XMP Extended
{ JPEG_APP1, "http://ns.adobe.com/xmp/extension/", 35, &xmp_ext, XMPMerge },
// Fake ICC handler, even if ICC is treated separately. This is to prevent
// storing the ICC data as 'app_markers'.
{ JPEG_APP2, "ICC_PROFILE", 12, nullptr, nullptr },
};
jpeg_saved_marker_ptr marker;
// Treat ICC profiles separately as they may be segmented and out of order.
if (!StoreICCP(dinfo, &param->iccp)) return 0;
if (!StoreICCP(dinfo, &param->iccp)) return false;

for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
int i;
for (i = 0; metadata_map[i].marker != 0; ++i) {
if (marker->marker == metadata_map[i].marker &&
marker->data_length > metadata_map[i].signature_length &&
!memcmp(marker->data, metadata_map[i].signature,
metadata_map[i].signature_length)) {
std::string* const payload = metadata_map[i].data;

if (payload->size() == 0) {
const char* marker_data =
reinterpret_cast<const char*>(marker->data) +
metadata_map[i].signature_length;
const size_t marker_data_length =
marker->data_length - metadata_map[i].signature_length;
payload->append(marker_data, marker_data_length);
} else {
fprintf(stderr, "Ignoring additional '%s' marker\n",
metadata_map[i].signature);
bool found = false;
for (const auto& m : metadata_map) {
if (marker->marker == m.marker &&
marker->data_length > m.signature_length &&
!memcmp(marker->data, m.signature, m.signature_length)) {
const uint8_t* const data = marker->data + m.signature_length;
const size_t data_length = marker->data_length - m.signature_length;
if (m.handler != nullptr && !m.handler(data, data_length, m.data)) {
fprintf(stderr, "Ignoring '%s' marker\n", m.signature);
}
found = true;
break;
}
}
}
return 1;
// append to app_markers
if (!found) {
char header[4];
header[0] = 0xff;
header[1] = marker->marker;
header[2] = ((marker->data_length + 2) >> 8) & 0xff;
header[3] = ((marker->data_length + 2) >> 0) & 0xff;
param->app_markers.append(header, sizeof(header));
param->app_markers.append(reinterpret_cast<const char*>(marker->data),
marker->data_length);
}
}
if (xmp_ext.ok) param->xmp += xmp_ext.ext;
return true;
}

#undef JPEG_APP1
Expand Down
3 changes: 3 additions & 0 deletions src/dichotomy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ size_t Encoder::HeaderSize() const {
}
if (xmp_.size() > 0) {
size += 2 + 2 + 29 + xmp_.size();
if (xmp_.size() > 65533) { // XMPExtended
size += (xmp_.size() / 65458 + 1) * 40;
}
}
size += 2 * 65 + 2 + 2; // DQT
size += 8 + 3 * nb_comps_ + 2; // SOF
Expand Down
6 changes: 4 additions & 2 deletions src/enc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2102,8 +2102,9 @@ void EncoderParam::SetMinQuantization(const uint8_t m[2][64],
void EncoderParam::ResetMetadata() {
iccp.clear();
exif.clear();
xmp.clear();
app_markers.clear();
xmp.clear();
xmp_split_point = 0u;
}

bool Encoder::InitFromParam(const EncoderParam& param) {
Expand All @@ -2126,8 +2127,9 @@ bool Encoder::InitFromParam(const EncoderParam& param) {

SetMetadata(param.iccp, Encoder::ICC);
SetMetadata(param.exif, Encoder::EXIF);
SetMetadata(param.xmp, Encoder::XMP);
SetMetadata(param.app_markers, Encoder::MARKERS);
SetMetadata(param.xmp, Encoder::XMP);
xmp_split_ = param.xmp_split_point;

passes_ = (param.passes < 1) ? 1 : (param.passes > 20) ? 20 : param.passes;
if (passes_ > 1) {
Expand Down
105 changes: 80 additions & 25 deletions src/headers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,23 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <algorithm>

#include "sjpegi.h"
#include "md5sum.h"

namespace sjpeg {

void Encoder::Put16b(uint32_t size) {
bw_.PutByte((size >> 8) & 0xff);
bw_.PutByte((size >> 0) & 0xff);
}

void Encoder::Put32b(uint32_t size) {
Put16b(size >> 16);
Put16b(size >> 0);
}

////////////////////////////////////////////////////////////////////////////////
// Headers
//
Expand Down Expand Up @@ -65,10 +77,8 @@ bool Encoder::WriteEXIF(const std::string& data) {
if (data_size > 0xffff) return false;
ok_ = ok_ && bw_.Reserve(data_size + 2);
if (!ok_) return false;
bw_.PutByte(0xff);
bw_.PutByte(0xe1);
bw_.PutByte((data_size >> 8) & 0xff);
bw_.PutByte((data_size >> 0) & 0xff);
Put16b(0xffe1);
Put16b(data_size);
bw_.PutBytes(kEXIF, kEXIF_len);
bw_.PutBytes(reinterpret_cast<const uint8_t*>(data.data()), data.size());
return true;
Expand All @@ -80,19 +90,17 @@ bool Encoder::WriteICCP(const std::string& data) {
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data.data());
const uint8_t kICCP[] = "ICC_PROFILE";
const size_t kICCP_len = 12; // includes the \0
const size_t chunk_size_max = 0xffff - kICCP_len - 4;
size_t max_chunk = (data_size + chunk_size_max - 1) / chunk_size_max;
const size_t kMaxChunkSize = 0xffff - kICCP_len - 4;
size_t max_chunk = (data_size + kMaxChunkSize - 1) / kMaxChunkSize;
if (max_chunk >= 256) return false;
size_t seq = 1;
while (data_size > 0) {
size_t size = data_size;
if (size > chunk_size_max) size = chunk_size_max;
ok_ = ok_ && bw_.Reserve(size + kICCP_len + 4 + 2);
const size_t size = std::min(kMaxChunkSize, data_size);
const size_t total_size = size + kICCP_len + 4;
ok_ = ok_ && bw_.Reserve(total_size + 2);
if (!ok_) return false;
bw_.PutByte(0xff);
bw_.PutByte(0xe2);
bw_.PutByte(((size + kICCP_len + 4) >> 8) & 0xff);
bw_.PutByte(((size + kICCP_len + 4) >> 0) & 0xff);
Put16b(0xffe2);
Put16b(total_size);
bw_.PutBytes(kICCP, kICCP_len);
bw_.PutByte(seq & 0xff);
bw_.PutByte(max_chunk & 0xff);
Expand All @@ -104,21 +112,71 @@ bool Encoder::WriteICCP(const std::string& data) {
return true;
}

bool Encoder::WriteXMPExtended(const std::string& data) {
const size_t kMainSize = 65503;
if (data.size() < kMainSize) return true; // too short! should be a main XMP
if (data.size() > (1u << 31)) return false; // too large
size_t split = (xmp_split_ == 0) ? kMainSize : xmp_split_;
split = std::min(split, data.size());
// search for the extension tag
const size_t note_pos = data.find(std::string("xmpNote:HasExtendedXMP=\""));
if (note_pos == std::string::npos) return false; // no extension!
if (note_pos + 24 + 32 + 1 > split) return false; // ill-formed
if (data[note_pos + 24 + 32] != '\"') return false;
// split in main / extension
std::string main_data(&data[0], split);
std::string ext_data(&data[split], data.size() - split);
// compute GUID
uint8_t* const guid = reinterpret_cast<uint8_t*>(&main_data[note_pos + 24]);
MD5Digest(ext_data).Get(guid);

// Main chunk.
if (!WriteXMP(main_data)) return false;

// Extended chunks.
const uint8_t kXMPExt[] = "http://ns.adobe.com/xmp/extension/";
const size_t kXMPExt_size = sizeof(kXMPExt);
const size_t kBufSize = 65458;

// 40 = 32 bytes for GUID + 4 bytes for size + 4 bytes for position
const size_t kHeaderSize = kXMPExt_size + 40;
const size_t num_chunks = ext_data.size() / kBufSize + 1;
const size_t data_size = num_chunks * (kHeaderSize + 2 + 2) + ext_data.size();
ok_ = ok_ && bw_.Reserve(data_size);
if (!ok_) return false;
size_t read_pos = 0, write_pos = 0;
for (uint32_t chunk = 0; chunk < num_chunks; ++chunk) {
const uint32_t write_size = std::min(kBufSize, ext_data.size() - read_pos);
Put16b(0xffe1); // APP1
Put16b(2 + kHeaderSize + write_size);
bw_.PutBytes(kXMPExt, kXMPExt_size);
bw_.PutBytes(guid, 32u);
Put32b(ext_data.size()); // total size, not chunk size!
Put32b(read_pos);
bw_.PutBytes(reinterpret_cast<const uint8_t*>(&ext_data[read_pos]),
write_size);
read_pos += write_size;
write_pos += 2 + (2 + kHeaderSize + write_size);
}
return (write_pos == data_size);
}

bool Encoder::WriteXMP(const std::string& data) {
if (data.size() == 0) return true;
const uint8_t kXMP[] = "http://ns.adobe.com/xap/1.0/";
const size_t kXMP_size = 29;
const size_t kXMP_size = sizeof(kXMP);
const size_t data_size = 2 + data.size() + kXMP_size;
if (data_size > 0xffff) return false; // error
if (data_size <= 0xffff) { // don't use extended XMP if small enough data
ok_ = ok_ && bw_.Reserve(data_size + 2);
if (!ok_) return false;
bw_.PutByte(0xff);
bw_.PutByte(0xe1);
bw_.PutByte((data_size >> 8) & 0xff);
bw_.PutByte((data_size >> 0) & 0xff);
Put16b(0xffe1);
Put16b(data_size);
bw_.PutBytes(kXMP, kXMP_size);
bw_.PutBytes(reinterpret_cast<const uint8_t*>(data.data()), data.size());
return true;
}
// need to split into main-chunk + extended sections
return WriteXMPExtended(data);
}

void Encoder::WriteDQT() {
Expand Down Expand Up @@ -170,10 +228,8 @@ void Encoder::WriteDHT() {
assert(data_size <= 255);
ok_ = ok_ && bw_.Reserve(data_size + 2);
if (!ok_) return;
bw_.PutByte(0xff);
bw_.PutByte(0xc4);
bw_.PutByte(0x00 /*data_size >> 8*/);
bw_.PutByte(data_size);
Put16b(0xffc4);
Put16b(data_size);
bw_.PutByte((type << 4) | c);
bw_.PutBytes(h->bits_, 16);
bw_.PutBytes(h->syms_, h->nb_syms_);
Expand Down Expand Up @@ -208,8 +264,7 @@ void Encoder::WriteEOI() { // EOI
ok_ = ok_ && bw_.Reserve(2);
if (!ok_) return;
// append EOI
bw_.PutByte(0xff);
bw_.PutByte(0xd9);
Put16b(0xffd9);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
13 changes: 10 additions & 3 deletions src/sjpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,15 +240,22 @@ struct EncoderParam {
// if null, a default implementation will be used
sjpeg::SearchHook* search_hook;

// metadata: extra EXIF/XMP/ICCP data that will be embedded in
// metadata: extra EXIF/XMP/XMPExt/ICCP data that will be embedded in
// APP1 or APP2 markers. They should contain only the raw payload and not
// the prefixes ("Exif\0", "ICC_PROFILE", etc...). These will be added
// automatically during encoding.
// automatically during encoding. If XMP data is larger than 65504 bytes,
// XMPExtended chunks will be used.
// It is the caller's responsibility to make sure that the xmp data contains
// the necessary extension syntax (xmpNote:HasExtended="...") somewhere
// in the first 65504 bytes. The MD5 digest will be inserted at the correct
// location. If xmp_split_point is not 0, it will be used to split the xmp
// data chunk.
// Conversely, the content of app_markers is written as is, right after APP0.
std::string exif;
std::string xmp;
std::string iccp;
std::string app_markers;
std::string xmp;
uint16_t xmp_split_point = 0u; // user-supplied split point for extended XMP
void ResetMetadata(); // clears the above

// Memory manager used by the codec. If null, default one will be used.
Expand Down
Loading

0 comments on commit fc8138a

Please sign in to comment.