V2.2.0 (#157)

doug-martin · web-flow · commit 5ac838e21420 · 2016-09-22T15:26:12.000-05:00
* Test issue 150 - Add a test for issue #150 to specify the expected behavior - Mark it `skip` pending implementation * Test split CRLF - Add a test for CRLF split between two buffers (a.k.a issue #150) to specify the expected behavior - Mark it `skip` pending implementation * Test ambiguous CR - Modify existing tests for `\r` row delimiter to specify the behavior in case of CR vs.CRLF ambiguity issue #150 - Mark them `skip` pending implementation * Keep the line if a new line is ambiguous Modify the parser to - parse CRLF as a single token - keep the current line unparsed if it ends in CR and there's more data This solves the issues #146 and #150 by ensuring that CRLF split by a buffer boundary doesn't get treated as two row delimiters CR+LF * v2.2.0
diff --git a/History.md b/History.md
@@ -1,3 +1,7 @@
+# v2.2.0
+
+* Handle split CRLF [#156](https://github.com/C2FO/fast-csv/pull/156) - [@alexeits](https://github.com/alexeits)
+
 # v2.1.0
 
 * Now handles tab delimited CSVs with only spaces for field values
diff --git a/lib/parser/parser.js b/lib/parser/parser.js
@@ -143,10 +143,11 @@ function createParser(options) {
     }
 
     function getNextToken(line, cursor) {
-        var token, nextIndex, subStr = line.substr(cursor);
+        var token, tokenLen, nextIndex, subStr = line.substr(cursor);
         if ((nextIndex = subStr.search(NEXT_TOKEN_REGEXP)) !== -1) {
-            token = line[cursor += nextIndex];
-            cursor += subStr.match(NEXT_TOKEN_REGEXP)[1].length - 1;
+            tokenLen = subStr.match(NEXT_TOKEN_REGEXP)[1].length;
+            token = line.substr(cursor + nextIndex, tokenLen);
+            cursor += nextIndex + tokenLen - 1;
         }
         return {token: token, cursor: cursor};
     }
@@ -167,6 +168,11 @@ function createParser(options) {
                     items = [];
                     lastLineI = i;
                 } else {
+                    // if ends with CR and there is more data, keep unparsed due to possible coming LF in CRLF
+                    if (token === '\r' && hasMoreData) {
+                        i = lastLineI;
+                        cursor = null;
+                    }
                     break;
                 }
             } else if (hasComments && token === COMMENT) {
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "fast-csv",
-    "version": "2.1.0",
+    "version": "2.2.0",
     "description": "CSV parser and writer",
     "main": "index.js",
     "scripts": {
diff --git a/test/issues.test.js b/test/issues.test.js
@@ -239,4 +239,16 @@ it.describe("github issues", function (it) {
             });
         });
     });
+
+    it.describe("#150", function (it) {
+        it.should("not parse a row if a new line is ambiguous and there is more data", function () {
+            var data = "first_name,last_name,email_address\r";
+            var myParser = parser({delimiter: ","});
+            var parsedData = myParser(data, true);
+            assert.deepEqual(parsedData, {
+                "line": "first_name,last_name,email_address\r",
+                "rows": []
+            });
+        });
+    });
 });
diff --git a/test/parser.test.js b/test/parser.test.js
@@ -319,7 +319,7 @@ it.describe("fast-csv parser", function (it) {
             });
 
             it.should("parse a block of CSV text with a trailing delimiter", function () {
-                var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com,\n";
+                var data = "first_name,last_name,email_address,empty\rFirst1,Last1,email1@email.com,\r";
                 var myParser = parser({delimiter: ","});
                 assert.deepEqual(myParser(data, false), {
                     "line": "", "rows": [
@@ -330,7 +330,7 @@ it.describe("fast-csv parser", function (it) {
             });
 
             it.should("parse a block of CSV text with a trailing delimiter followed by a space", function() {
-                var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com, \n";
+                var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com, \r";
                 var myParser = parser({ delimiter: "," });
                 assert.deepEqual(myParser(data, false), {
                     "line": "", "rows": [
@@ -341,7 +341,7 @@ it.describe("fast-csv parser", function (it) {
             });
 
             it.should("parse a block of Space Separated Value text with a trailing delimiter", function() {
-                var data = "first_name last_name email_address empty\nFirst1 Last1 email1@email.com \n";
+                var data = "first_name last_name email_address empty\rFirst1 Last1 email1@email.com \r";
                 var myParser = parser({ delimiter: " " });
                 assert.deepEqual(myParser(data, false), {
                     "line": "", "rows": [
@@ -400,15 +400,13 @@ it.describe("fast-csv parser", function (it) {
                 });
             });
 
-            it.should("parse a row if a new line is found and there is more data", function () {
+            it.should("not parse a row if an ambiguous new line is found and there is more data", function () {
                 var data = "first_name,last_name,email_address\r";
                 var myParser = parser({delimiter: ","});
                 var parsedData = myParser(data, true);
                 assert.deepEqual(parsedData, {
-                    "line": "",
-                    "rows": [
-                        ["first_name", "last_name", "email_address"]
-                    ]
+                    "line": "first_name,last_name,email_address\r",
+                    "rows": []
                 });
             });
 
@@ -532,15 +530,13 @@ it.describe("fast-csv parser", function (it) {
                 });
             });
 
-            it.should("parse a row if a new line is found and there is more data", function () {
+            it.should("not parse a row if an ambiguous new line is found and there is more data", function () {
                 var data = '"first_name","last_name","email_address"\r';
                 var myParser = parser({delimiter: ","});
                 var parsedData = myParser(data, true);
                 assert.deepEqual(parsedData, {
-                    "line": "",
-                    "rows": [
-                        ["first_name", "last_name", "email_address"]
-                    ]
+                    "line": '"first_name","last_name","email_address"\r',
+                    "rows": []
                 });
             });
         });
@@ -614,6 +610,16 @@ it.describe("fast-csv parser", function (it) {
                 });
             });
 
+            it.should("not parse a row if a new line is incomplete and there is more data", function () {
+                var data = "first_name,last_name,email_address\r";
+                var myParser = parser({delimiter: ","});
+                var parsedData = myParser(data, true);
+                assert.deepEqual(parsedData, {
+                    "line": "first_name,last_name,email_address\r",
+                    "rows": []
+                });
+            });
+
             it.should("not parse a row if there is a trailing delimiter and there is more data", function () {
                 var data = "first_name,last_name,email_address,";
                 var myParser = parser({delimiter: ","});

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "fast-csv",`
`3`		`- "version": "2.1.0",`
	`3`	`+ "version": "2.2.0",`
`4`	`4`	`"description": "CSV parser and writer",`
`5`	`5`	`"main": "index.js",`
`6`	`6`	`"scripts": {`