-
Notifications
You must be signed in to change notification settings - Fork 7
/
index.js
405 lines (330 loc) · 12.5 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
const { Transform } = require('stream');
const Iconv = require('iconv-lite');
class YADBF extends Transform {
constructor(options = {}) {
super({ readableObjectMode: true });
// create an empty buffer to simplify logic later
this.unconsumedBytes = Buffer.alloc(0);
this.offset = validateOffset(options.offset);
this.size = validateSize(options.size);
this.includeDeletedRecords = validateDeleted(options.deleted);
this.encoding = validateEncoding(options.encoding);
// keep track of how many records have been made readable (used for end-of-stream detection)
this.totalRecordCount = 0;
// keep track of how many records *could* have been pushed (used for pagination)
this.eligibleRecordCount = 0;
}
_final(callback) {
if (!this.header) {
const numberOfBytes = this.unconsumedBytes ? this.unconsumedBytes.length : 0;
this.destroy(`Unable to parse first 32 bytes from header, found ${numberOfBytes} byte(s)`);
}
return callback();
}
_transform(chunk, encoding, callback) {
// append the chunk to unconsumed bytes for easier bookkeeping
this.unconsumedBytes = Buffer.concat( [this.unconsumedBytes, chunk] );
// if the header hasn't been parsed yet, do so now and emit it
if (!this.header) {
// if there aren't enough bytes to read the header, save off the accumulated
// bytes for later use and return
if (!hasEnoughBytesForHeader(this.unconsumedBytes)) {
return callback();
}
// otherwise, attempt to parse the header
try {
this.header = parseHeader(this.unconsumedBytes);
// emit the header for outside consumption
this.emit('header', this.header);
// remove the header bytes from the beginning of the chunk (for easier bookkeeping)
this.unconsumedBytes = this.unconsumedBytes.slice(this.header.numberOfHeaderBytes);
} catch (err) {
this.destroy(err);
return callback();
}
}
// process records from the unconsumed bytes
while (hasEnoughBytesForRecord(this.unconsumedBytes, this.header) && moreRecordsAreExpected.bind(this)()) {
// get enough bytes for the record
const recordSizedChunk = this.unconsumedBytes.slice(0, this.header.numberOfBytesInRecord);
try {
const record = convertToRecord(recordSizedChunk, this.header, this.encoding);
// only push if it's eligble for output and within the pagination params
if (isEligibleForOutput(record, this.includeDeletedRecords)) {
if (isWithinPage(this.eligibleRecordCount, this.offset, this.size)) {
this.push(record);
}
// increment total # of records pushed for pagination check
this.eligibleRecordCount+=1;
}
// increment total # of records consumed for end-of-stream check
this.totalRecordCount+=1;
// remove the slice from the unconsumed bytes
this.unconsumedBytes = this.unconsumedBytes.slice(recordSizedChunk.length);
} catch (err) {
this.destroy(err);
return callback();
}
}
// if all the records have been emitted, proceed with shutdown
if (allRecordsHaveBeenProcessed(this.header.numberOfRecords, this.totalRecordCount) &&
aSingleByteRemains(this.unconsumedBytes)) {
// throw an error if the last byte isn't the expected EOF marker
if (!firstByteIsEOFMarker(this.unconsumedBytes)) {
this.destroy('Last byte of file is not end-of-file marker');
}
// otherwise clear up unconsumedBytes and signal end-of-stream
delete this.unconsumedBytes;
this.push(null);
}
callback();
}
}
// see: https://github.com/infused/dbf/blob/master/lib/dbf/table.rb
const supportedVersions = new Set([
0x03, // FoxBASE+/Dbase III plus, no memo
0x83, // FoxBASE+/dBASE III PLUS, with memo
0xF5, // FoxPro 2.x (or earlier) with memo
0x8B, // dBASE IV with memo
0x8E // ?
]);
const supportedFieldTypes = new Set(['C', 'D', 'F', 'L', 'M', 'N']);
const truthyValues = new Set(['Y', 'y', 'T', 't']);
const falseyValues = new Set(['N', 'n', 'F', 'f']);
// valid M-type value regex (10 digits or 10 spaces)
const validMTypeValueRegex = /^(\d{10}| {10})$/;
// type handlers keyed by the single character type designator
const typeHandlers = {
D(value) {
return new Date(
value.substr(0, 4)
+ '-'
+ value.substr(4, 2)
+ '-'
+ value.substr(6, 2)
);
},
L(value) {
if (truthyValues.has(value)) {
return true;
} else if (falseyValues.has(value)) {
return false;
} else if (value !== '?' && value !== ' ') { // '?' or ' ' means undefined
throw new Error(`Invalid L-type field value: ${value}`);
}
},
F(value) {
return parseFloat(value);
},
N(value) {
return parseFloat(value);
},
C(value) {
return value.replace(/[\u0000 ]+$/, '');
},
M(value) {
if (!validMTypeValueRegex.test(value)) {
throw new Error(`Invalid M-type field value: '${value}'`);
} else {
return value;
}
}
};
// returns true if enough bytes have been read to parse the entire header
function hasEnoughBytesForHeader(chunk) {
return chunk.length >= 32 && chunk.length >= chunk.readUInt16LE(8);
}
// returns true if enough bytes have been read to parse a record
function hasEnoughBytesForRecord(chunk, header) {
return chunk.length >= header.numberOfBytesInRecord;
}
// returns true if the number of processed records is less than the number of declared records
function moreRecordsAreExpected() {
return this.totalRecordCount < this.header.numberOfRecords;
}
// returns true if record is not deleted or deleted records should be included
function isEligibleForOutput(record, includeDeletedRecords) {
return !record['@meta'].deleted || !!includeDeletedRecords;
}
// returns true if count is within the page size/offset combination
function isWithinPage(count, offset, size) {
return count >= offset && count < offset + size;
}
// returns true if the actual number of records processed matches the number of expected records
function allRecordsHaveBeenProcessed(expectedNumberOfRecords, numberOfRecordsProcessed) {
return expectedNumberOfRecords === numberOfRecordsProcessed;
}
// returns true if there's exactly 1 byte in a buffer
function aSingleByteRemains(unconsumedBytes) {
return unconsumedBytes.length === 1;
}
// returns true if the first byte of a buffer is the EOF marker
function firstByteIsEOFMarker(unconsumedBytes) {
return unconsumedBytes.readUInt8(0) === 0x1A;
}
// convert the buffer to a header object
function parseHeader(buffer) {
const versionByte = buffer.readUInt8(0);
if (!supportedVersions.has(versionByte)) {
throw new Error(`Unsupported version: ${versionByte}`);
}
const numberOfHeaderBytes = buffer.readUInt16LE(8);
// the number of header bytes should be 1 when modded with 32
if (numberOfHeaderBytes % 32 !== 1) {
throw new Error(`Invalid number of header bytes: ${numberOfHeaderBytes}`);
}
// there are 32 bytes per header field + 1 byte for terminator + 32 bytes for the initial header
const numberOfFields = (numberOfHeaderBytes-32-1)/32;
const fieldBytes = buffer.slice(32, numberOfHeaderBytes);
// emit an error if the header bytes does not end with 0x0D (per spec)
if (fieldBytes.readUInt8(numberOfHeaderBytes-32-1) !== 0x0D) {
throw new Error(`Invalid field descriptor array terminator at byte ${numberOfHeaderBytes}`);
}
const encryptionByte = buffer.readUInt8(15);
// if the source is encrypted, then emit an error
if (encryptionByte === 1) {
throw new Error('Encryption flag is set, cannot process');
}
// valid values for the encryption byte are 0x00 and 0x01, emit an error otherwise
if (encryptionByte > 1) {
throw new Error(`Invalid encryption flag value: ${encryptionByte}`);
}
const hasProductionMDXFile = buffer.readUInt8(28);
// production MDX file existence value must be 0x01 or 0x02 (per spec)
if (hasProductionMDXFile > 1) {
throw new Error(`Invalid production MDX file existence value: ${hasProductionMDXFile}`);
}
// construct and return the header
const header = {
version: versionByte,
dateOfLastUpdate: new Date(
1900 + buffer.readUInt8(1),
buffer.readUInt8(2) - 1,
buffer.readUInt8(3)
),
numberOfRecords: buffer.readInt32LE(4),
numberOfHeaderBytes: numberOfHeaderBytes,
numberOfBytesInRecord: buffer.readInt16LE(10),
hasProductionMDXFile: hasProductionMDXFile,
langaugeDriverId: buffer.readUInt8(29),
fields: Array.from( {length: numberOfFields }, parseHeaderField.bind(null, fieldBytes))
};
// if there are any duplicate field names, throw an error
header.fields.reduce((allFieldNames, field) => {
if (allFieldNames.has(field.name)) {
throw new Error(`Duplicate field name '${field.name}'`);
}
return allFieldNames.add(field.name);
}, new Set());
return header;
}
// parses up 32 bytes from `fieldBytes` into a valid field definition
function parseHeaderField(fieldBytes, val, i) {
const field = fieldBytes.slice(i*32, i*32+32);
// extract the field length from the 16th byte
const length = field.readUInt8(16);
if (length === 255) {
throw new Error('Field length must be less than 255');
}
// extract the field type from the 11th byte
const type = field.toString('utf-8', 11, 12);
if (!supportedFieldTypes.has(type)) {
throw new Error(`Field type must be one of: ${Array.from(supportedFieldTypes).join(', ')}`);
}
// validate that certain types have expected lengths
if (type === 'D' && length !== 8) {
throw new Error(`Invalid D (date) field length: ${length}`);
}
if (type === 'L' && length !== 1) {
throw new Error(`Invalid L (logical) field length: ${length}`);
}
if (type === 'M' && length !== 10) {
throw new Error(`Invalid M (memo) field length: ${length}`);
}
// i have no idea what this is, but read it anyway since it might be of use
const isIndexedInMDXFile = field.readUInt8(31);
if (isIndexedInMDXFile > 1) {
throw new Error(`Invalid indexed in production MDX file value: ${isIndexedInMDXFile}`);
}
// return an object representing the field definition
return {
name: field.toString('utf-8', 0, 10).replace(/\0/g, ''),
type: type,
length: length,
precision: field.readUInt8(17),
workAreaId: field.readUInt16LE(18),
isIndexedInMDXFile: isIndexedInMDXFile === 1
};
}
// converts a record-sized chunk into an object based on the metadata available in `header`
function convertToRecord(chunk, header, encoding) {
const record = {
'@meta': {
deleted: isDeleted(chunk)
}
};
// keep track of how far we're into the record byte-wise
// start at 1 since the 0th byte is the deleted flag
let byteOffset = 1;
header.fields.forEach(field => {
// read the value out with given encoding
const value = Iconv.decode(chunk.slice(byteOffset, byteOffset+field.length), encoding);
// assign the field into the record
record[field.name] = typeHandlers[field.type](value);
// update where the next field starts
byteOffset += field.length;
});
return record;
}
// determines if the first byte of a chunk is a valid deleted flag, or throws an error otherwise
function isDeleted(chunk) {
const firstByte = chunk.readUInt8(0, 1);
if (firstByte === 0x20) { // ' '
return false;
}
if (firstByte === 0x2A) { // '*'
return true;
}
throw new Error(`Invalid deleted record value: ${String.fromCharCode(firstByte)}`);
}
// validates that `offset` is a non-negative integer, defaulting to `Infinity` if not supplied
function validateOffset(offset) {
if (offset === undefined) {
return 0;
}
if (offset < 0 || !Number.isInteger(offset)) {
throw new Error('offset must be a non-negative integer');
}
return offset;
}
// validates that `size` is a non-negative integer, defaulting to `Infinity` if not supplied
function validateSize(size) {
if (size === undefined) {
return Infinity;
}
if (size < 0 || !Number.isInteger(size)) {
throw new Error('size must be a non-negative integer');
}
return size;
}
// validates that `deleted` is a boolean, defaulting to `false` if not supplied
function validateDeleted(deleted) {
if (deleted === undefined) {
return false;
}
if (deleted !== true && deleted !== false) {
throw new Error('deleted must be a boolean');
}
return deleted;
}
// validates that `encoding` exists
function validateEncoding(encoding) {
if (encoding === undefined) {
return 'utf-8';
}
if (!Iconv.encodingExists(encoding)) {
throw new Error(`encoding not recognized: '${encoding}'`);
}
return encoding;
}
module.exports = YADBF;