Skip to content

Commit 5ac838e

Browse files
authored
V2.2.0 (#157)
* Test issue 150 - Add a test for issue #150 to specify the expected behavior - Mark it `skip` pending implementation * Test split CRLF - Add a test for CRLF split between two buffers (a.k.a issue #150) to specify the expected behavior - Mark it `skip` pending implementation * Test ambiguous CR - Modify existing tests for `\r` row delimiter to specify the behavior in case of CR vs.CRLF ambiguity issue #150 - Mark them `skip` pending implementation * Keep the line if a new line is ambiguous Modify the parser to - parse CRLF as a single token - keep the current line unparsed if it ends in CR and there's more data This solves the issues #146 and #150 by ensuring that CRLF split by a buffer boundary doesn't get treated as two row delimiters CR+LF * v2.2.0
1 parent 179b285 commit 5ac838e

File tree

5 files changed

+45
-17
lines changed

5 files changed

+45
-17
lines changed

History.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# v2.2.0
2+
3+
* Handle split CRLF [#156](https://github.com/C2FO/fast-csv/pull/156) - [@alexeits](https://github.com/alexeits)
4+
15
# v2.1.0
26

37
* Now handles tab delimited CSVs with only spaces for field values

lib/parser/parser.js

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,11 @@ function createParser(options) {
143143
}
144144

145145
function getNextToken(line, cursor) {
146-
var token, nextIndex, subStr = line.substr(cursor);
146+
var token, tokenLen, nextIndex, subStr = line.substr(cursor);
147147
if ((nextIndex = subStr.search(NEXT_TOKEN_REGEXP)) !== -1) {
148-
token = line[cursor += nextIndex];
149-
cursor += subStr.match(NEXT_TOKEN_REGEXP)[1].length - 1;
148+
tokenLen = subStr.match(NEXT_TOKEN_REGEXP)[1].length;
149+
token = line.substr(cursor + nextIndex, tokenLen);
150+
cursor += nextIndex + tokenLen - 1;
150151
}
151152
return {token: token, cursor: cursor};
152153
}
@@ -167,6 +168,11 @@ function createParser(options) {
167168
items = [];
168169
lastLineI = i;
169170
} else {
171+
// if ends with CR and there is more data, keep unparsed due to possible coming LF in CRLF
172+
if (token === '\r' && hasMoreData) {
173+
i = lastLineI;
174+
cursor = null;
175+
}
170176
break;
171177
}
172178
} else if (hasComments && token === COMMENT) {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "fast-csv",
3-
"version": "2.1.0",
3+
"version": "2.2.0",
44
"description": "CSV parser and writer",
55
"main": "index.js",
66
"scripts": {

test/issues.test.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,16 @@ it.describe("github issues", function (it) {
239239
});
240240
});
241241
});
242+
243+
it.describe("#150", function (it) {
244+
it.should("not parse a row if a new line is ambiguous and there is more data", function () {
245+
var data = "first_name,last_name,email_address\r";
246+
var myParser = parser({delimiter: ","});
247+
var parsedData = myParser(data, true);
248+
assert.deepEqual(parsedData, {
249+
"line": "first_name,last_name,email_address\r",
250+
"rows": []
251+
});
252+
});
253+
});
242254
});

test/parser.test.js

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ it.describe("fast-csv parser", function (it) {
319319
});
320320

321321
it.should("parse a block of CSV text with a trailing delimiter", function () {
322-
var data = "first_name,last_name,email_address,empty\nFirst1,Last1,[email protected],\n";
322+
var data = "first_name,last_name,email_address,empty\rFirst1,Last1,[email protected],\r";
323323
var myParser = parser({delimiter: ","});
324324
assert.deepEqual(myParser(data, false), {
325325
"line": "", "rows": [
@@ -330,7 +330,7 @@ it.describe("fast-csv parser", function (it) {
330330
});
331331

332332
it.should("parse a block of CSV text with a trailing delimiter followed by a space", function() {
333-
var data = "first_name,last_name,email_address,empty\nFirst1,Last1,[email protected], \n";
333+
var data = "first_name,last_name,email_address,empty\nFirst1,Last1,[email protected], \r";
334334
var myParser = parser({ delimiter: "," });
335335
assert.deepEqual(myParser(data, false), {
336336
"line": "", "rows": [
@@ -341,7 +341,7 @@ it.describe("fast-csv parser", function (it) {
341341
});
342342

343343
it.should("parse a block of Space Separated Value text with a trailing delimiter", function() {
344-
var data = "first_name last_name email_address empty\nFirst1 Last1 [email protected] \n";
344+
var data = "first_name last_name email_address empty\rFirst1 Last1 [email protected] \r";
345345
var myParser = parser({ delimiter: " " });
346346
assert.deepEqual(myParser(data, false), {
347347
"line": "", "rows": [
@@ -400,15 +400,13 @@ it.describe("fast-csv parser", function (it) {
400400
});
401401
});
402402

403-
it.should("parse a row if a new line is found and there is more data", function () {
403+
it.should("not parse a row if an ambiguous new line is found and there is more data", function () {
404404
var data = "first_name,last_name,email_address\r";
405405
var myParser = parser({delimiter: ","});
406406
var parsedData = myParser(data, true);
407407
assert.deepEqual(parsedData, {
408-
"line": "",
409-
"rows": [
410-
["first_name", "last_name", "email_address"]
411-
]
408+
"line": "first_name,last_name,email_address\r",
409+
"rows": []
412410
});
413411
});
414412

@@ -532,15 +530,13 @@ it.describe("fast-csv parser", function (it) {
532530
});
533531
});
534532

535-
it.should("parse a row if a new line is found and there is more data", function () {
533+
it.should("not parse a row if an ambiguous new line is found and there is more data", function () {
536534
var data = '"first_name","last_name","email_address"\r';
537535
var myParser = parser({delimiter: ","});
538536
var parsedData = myParser(data, true);
539537
assert.deepEqual(parsedData, {
540-
"line": "",
541-
"rows": [
542-
["first_name", "last_name", "email_address"]
543-
]
538+
"line": '"first_name","last_name","email_address"\r',
539+
"rows": []
544540
});
545541
});
546542
});
@@ -614,6 +610,16 @@ it.describe("fast-csv parser", function (it) {
614610
});
615611
});
616612

613+
it.should("not parse a row if a new line is incomplete and there is more data", function () {
614+
var data = "first_name,last_name,email_address\r";
615+
var myParser = parser({delimiter: ","});
616+
var parsedData = myParser(data, true);
617+
assert.deepEqual(parsedData, {
618+
"line": "first_name,last_name,email_address\r",
619+
"rows": []
620+
});
621+
});
622+
617623
it.should("not parse a row if there is a trailing delimiter and there is more data", function () {
618624
var data = "first_name,last_name,email_address,";
619625
var myParser = parser({delimiter: ","});

0 commit comments

Comments
 (0)