forked from MikeKovarik/exifr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxmp.mjs
328 lines (277 loc) · 9.42 KB
/
xmp.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import {AppSegmentParserBase} from '../parser.mjs'
import {segmentParsers} from '../plugins.mjs'
import {undefinedIfEmpty} from '../util/helpers.mjs'
import {BufferView} from '../util/BufferView.mjs'
const XMP_CORE_HEADER = 'http://ns.adobe.com/'
const XMP_MAIN_HEADER = 'http://ns.adobe.com/xap/1.0/'
const XMP_EXTENDED_HEADER = 'http://ns.adobe.com/xmp/extension/'
// 2 bytes for markers + 2 bytes length
const TIFF_HEADER_LENGTH = 4
// Each XMP Extended segment starts with guid, length and offset (of its own, not just the one in TIFF header)
const XMP_EXTENDED_DATA_OFFSET = 79
export default class Xmp extends AppSegmentParserBase {
static type = 'xmp'
static multiSegment = true
static canHandle(chunk, offset) {
return chunk.getUint8(offset + 1) === 0xE1
&& chunk.getUint32(offset + 4) === 0x68747470 // 'http'
&& chunk.getString(offset + 4, XMP_CORE_HEADER.length) === XMP_CORE_HEADER
}
static headerLength(chunk, offset) {
let headerString = chunk.getString(offset + 4, XMP_EXTENDED_HEADER.length)
if (headerString === XMP_EXTENDED_HEADER)
return XMP_EXTENDED_DATA_OFFSET
else
return TIFF_HEADER_LENGTH + XMP_MAIN_HEADER.length + 1 // 1 for null termination between header and data
}
static findPosition(chunk, offset) {
let seg = super.findPosition(chunk, offset)
// first is the main XMP, then the extended starts counting from 0.
// We could determine that the XMP has extension if we looked for 'HasExtendedXMP'
// but we don't want to read the segment here just yet.
seg.multiSegment = seg.extended = seg.headerLength === XMP_EXTENDED_DATA_OFFSET
if (seg.multiSegment) {
seg.chunkCount = chunk.getUint8(offset + 72)
seg.chunkNumber = chunk.getUint8(offset + 76)
// first and second chunk both have 0 as the chunk number.
// the true first chunk (the one with <x:xmpme) has zeroes in the last two bytes of the chunk header.
if (chunk.getUint8(offset + 77) !== 0) seg.chunkNumber++
} else {
// The main XMP segment is not numbered and we can't determine if there's any XMP Extended chunks without
// parsing and looking for 'HasExtendedXMP'. We don't want to read in this simple due to 1) side effects
// and 2) chunked reader. For now we can only tell "there's a possibility of more chunks"
seg.chunkCount = Infinity
seg.chunkNumber = -1
}
return seg
}
static handleMultiSegments(allSegments) {
return allSegments.map(seg => seg.chunk.getString()).join('')
}
// WARNING: XMP as IFD0 tag in TIFF can be either Uint8Array or string.
// We need to be ready to accept any input data and turn it into string.
normalizeInput(input) {
return typeof input === 'string'
? input
: BufferView.from(input).getString()
}
// warning: The content may or may not be wrapped into <?xpacket.
// So far i've seen all of these:
// '<?xpacket><x:xmpmeta><rdf:RDF>'
// '<?xpacket><rdf:RDF>'
// '<x:xmpmeta><rdf:RDF>'
parse(xmpString = this.chunk) {
if (!this.localOptions.parse)
return xmpString
xmpString = idNestedTags(xmpString)
let tags = XmlTag.findAll(xmpString, 'rdf', 'Description')
if (tags.length === 0)
tags.push(new XmlTag('rdf', 'Description', undefined, xmpString))
let xmp = {}
let namespace
for (let tag of tags) {
for (let prop of tag.properties) {
namespace = getNamespace(prop.ns, xmp)
assignToObject(prop, namespace)
}
}
return pruneObject(xmp)
}
assignToOutput(root, xmp) {
if (!this.localOptions.parse) {
// xmp is not parsed, we include the string into output as is
root.xmp = xmp
} else {
// properties are grouped into separate namespace objects
// XMP TIFF namespace is merged into IFD0 block of TIFF segment
// XMP EXIF namespace is merged into EXIF block of TIFF segment
// All other namespaces are assigned
for (let [ns, nsObject] of Object.entries(xmp)) {
switch (ns) {
case 'tiff':
this.assignObjectToOutput(root, 'ifd0', nsObject)
break
case 'exif':
this.assignObjectToOutput(root, 'exif', nsObject)
break
case 'xmlns':
// XMLNS attributes aren't links but namespace identifiers in the URI form.
// TLDR: It's a useless bullshit. Don't need it. Get over it.
break
default:
this.assignObjectToOutput(root, ns, nsObject)
break
}
}
}
}
}
// removes undefined properties and empty objects
function pruneObject(object) {
let val
for (let key in object) {
val = object[key] = undefinedIfEmpty(object[key])
if (val === undefined)
delete object[key]
}
return undefinedIfEmpty(object)
}
segmentParsers.set('xmp', Xmp)
// ----- ATTRIBUTES -----
export class XmlAttr {
static findAll(string) {
// NOTE: regex has to be recreated each time because it's stateful due to use in exec()
let regex = /([a-zA-Z0-9-]+):([a-zA-Z0-9-]+)=("[^"]*"|'[^']*')/gm
return matchAll(string, regex).map(XmlAttr.unpackMatch)
}
static unpackMatch(match) {
let ns = match[1]
let name = match[2]
let value = match[3].slice(1, -1)
value = normalizeValue(value)
return new XmlAttr(ns, name, value)
}
constructor(ns, name, value) {
this.ns = ns
this.name = name
this.value = value
}
serialize() {
return this.value
}
}
// ----- TAGS -----
const tagNamePartRegex = '[\\w\\d-]+'
const VALUE_PROP = 'value'
export class XmlTag {
static findAll(xmpString, ns, name) {
// NOTE: regex has to be recreated each time because it's stateful due to use in exec()
// handles both pair and self-closing tags.
if (ns !== undefined || name !== undefined) {
ns = ns || tagNamePartRegex
name = name || tagNamePartRegex
var regex = new RegExp(`<(${ns}):(${name})(#\\d+)?((\\s+?[\\w\\d-:]+=("[^"]*"|'[^']*'))*\\s*)(\\/>|>([\\s\\S]*?)<\\/\\1:\\2\\3>)`, 'gm')
} else {
var regex = /<([\w\d-]+):([\w\d-]+)(#\d+)?((\s+?[\w\d-:]+=("[^"]*"|'[^']*'))*\s*)(\/>|>([\s\S]*?)<\/\1:\2\3>)/gm
}
return matchAll(xmpString, regex).map(XmlTag.unpackMatch)
}
static unpackMatch(match) {
let ns = match[1]
let name = match[2]
let attrString = match[4]
let innerXml = match[8]
return new XmlTag(ns, name, attrString, innerXml)
}
constructor(ns, name, attrString, innerXml) {
this.ns = ns
this.name = name
this.attrString = attrString
this.innerXml = innerXml
this.attrs = XmlAttr.findAll(attrString)
this.children = XmlTag.findAll(innerXml)
this.value = this.children.length === 0 ? normalizeValue(innerXml) : undefined
this.properties = [...this.attrs, ...this.children]
}
get isPrimitive() {
return this.value !== undefined
&& this.attrs.length === 0
&& this.children.length === 0
}
get isListContainer() {
return this.children.length === 1
&& this.children[0].isList
}
get isList() {
let {ns, name} = this
return ns === 'rdf'
&& (name === 'Seq' || name === 'Bag' || name === 'Alt')
}
get isListItem() {
return this.ns === 'rdf' && this.name === 'li'
}
serialize() {
// invalid and undefined
if (this.properties.length === 0 && this.value === undefined)
return undefined
// primitive property
if (this.isPrimitive)
return this.value
// tag containing list tag <ns:tag><rdf:Seq>...</rdf:Seq></ns:tag>
if (this.isListContainer)
return this.children[0].serialize()
// list tag itself <rdf:Seq>...</rdf:Seq>
if (this.isList)
return unwrapArray(this.children.map(serialize))
// sometimes <rdf:li> may have a single object-tag child. We need that object returned.
if (this.isListItem && this.children.length === 1 && this.attrs.length === 0)
return this.children[0].serialize()
// process attributes and children tags into object
let output = {}
for (let prop of this.properties)
assignToObject(prop, output)
if (this.value !== undefined)
output[VALUE_PROP] = this.value
return undefinedIfEmpty(output)
}
}
// ----- UTILS -----
function assignToObject(prop, target) {
let serialized = prop.serialize()
if (serialized !== undefined)
target[prop.name] = serialized
}
var serialize = prop => prop.serialize()
var unwrapArray = array => array.length === 1 ? array[0] : array
var getNamespace = (ns, root) => root[ns] ? root[ns] : root[ns] = {}
function matchAll(string, regex) {
let matches = []
if (!string) return matches
let match
while ((match = regex.exec(string)) !== null)
matches.push(match)
return matches
}
export function normalizeValue(value) {
if (isUndefinable(value)) return undefined
let num = Number(value)
if (!Number.isNaN(num)) return num
let lowercase = value.toLowerCase()
if (lowercase === 'true') return true
if (lowercase === 'false') return false
return value.trim()
}
function isUndefinable(value) {
return value === null
|| value === undefined
|| value === 'null'
|| value === 'undefined'
|| value === ''
|| value.trim() === ''
}
const identifiableTags = [
// Basic lists and items
'rdf:li', 'rdf:Seq', 'rdf:Bag', 'rdf:Alt',
// This is special case when list items can immediately contain nested rdf:Description
// e.g. <rdf:Bag><rdf:li><rdf:Description mwg-rs:Name="additional data"><... actual inner tag ...></rdf:Description></rdf:li></rdf:Bag>
'rdf:Description'
]
const nestedLiRegex = new RegExp(`(<|\\/)(${identifiableTags.join('|')})`, 'g')
export function idNestedTags(xmpString) {
let stacks = {}
let counts = {}
for (let tag of identifiableTags) {
stacks[tag] = []
counts[tag] = 0
}
return xmpString.replace(nestedLiRegex, (match, prevChar, tag) => {
if (prevChar === '<') {
let id = ++counts[tag]
stacks[tag].push(id)
return `${match}#${id}`
} else {
let id = stacks[tag].pop()
return `${match}#${id}`
}
})
}