diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index dbd6dc3d..154417c3 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -428,14 +428,15 @@ enum NfaState { InQuotedField = 3, InEscapedQuote = 4, InDoubleEscapedQuote = 5, - InComment = 6, + InEscapeSequence = 6, + InComment = 7, // All states below are "final field" states. // Namely, they indicate that a field has been parsed. - EndFieldDelim = 7, + EndFieldDelim = 8, // All states below are "final record" states. // Namely, they indicate that a record has been parsed. - EndRecord = 8, - CRLF = 9, + EndRecord = 9, + CRLF = 10, } /// A list of NFA states that have an explicit representation in the DFA. @@ -805,9 +806,9 @@ impl Reader { self.dfa.classes.add(self.delimiter); if self.quoting { self.dfa.classes.add(self.quote); - if let Some(escape) = self.escape { - self.dfa.classes.add(escape); - } + } + if let Some(escape) = self.escape { + self.dfa.classes.add(escape); } if let Some(comment) = self.comment { self.dfa.classes.add(comment); @@ -970,7 +971,7 @@ impl Reader { match state { End | StartRecord | EndRecord | InComment | CRLF => End, StartField | EndFieldDelim | EndFieldTerm | InField - | InQuotedField | InEscapedQuote | InDoubleEscapedQuote + | InQuotedField | InEscapedQuote | InDoubleEscapedQuote | InEscapeSequence | InRecordTerm => EndRecord, } } @@ -1018,6 +1019,8 @@ impl Reader { (EndFieldDelim, NfaInputAction::Discard) } else if self.term.equals(c) { (EndFieldTerm, NfaInputAction::Epsilon) + } else if !self.quoting && self.escape == Some(c) { + (InEscapeSequence, NfaInputAction::Discard) } else { (InField, NfaInputAction::CopyToOutput) } @@ -1043,6 +1046,7 @@ impl Reader { (InField, NfaInputAction::CopyToOutput) } } + InEscapeSequence => (InField, NfaInputAction::CopyToOutput), InComment => { if b'\n' == c { (StartRecord, NfaInputAction::Discard)