@@ -428,14 +428,15 @@ enum NfaState {
428
428
InQuotedField = 3 ,
429
429
InEscapedQuote = 4 ,
430
430
InDoubleEscapedQuote = 5 ,
431
- InComment = 6 ,
431
+ InEscapeSequence = 6 ,
432
+ InComment = 7 ,
432
433
// All states below are "final field" states.
433
434
// Namely, they indicate that a field has been parsed.
434
- EndFieldDelim = 7 ,
435
+ EndFieldDelim = 8 ,
435
436
// All states below are "final record" states.
436
437
// Namely, they indicate that a record has been parsed.
437
- EndRecord = 8 ,
438
- CRLF = 9 ,
438
+ EndRecord = 9 ,
439
+ CRLF = 10 ,
439
440
}
440
441
441
442
/// A list of NFA states that have an explicit representation in the DFA.
@@ -447,6 +448,7 @@ const NFA_STATES: &'static [NfaState] = &[
447
448
NfaState :: InQuotedField ,
448
449
NfaState :: InEscapedQuote ,
449
450
NfaState :: InDoubleEscapedQuote ,
451
+ NfaState :: InEscapeSequence ,
450
452
NfaState :: InComment ,
451
453
NfaState :: EndRecord ,
452
454
NfaState :: CRLF ,
@@ -805,9 +807,9 @@ impl Reader {
805
807
self . dfa . classes . add ( self . delimiter ) ;
806
808
if self . quoting {
807
809
self . dfa . classes . add ( self . quote ) ;
808
- if let Some ( escape ) = self . escape {
809
- self . dfa . classes . add ( escape) ;
810
- }
810
+ }
811
+ if let Some ( escape ) = self . escape {
812
+ self . dfa . classes . add ( escape ) ;
811
813
}
812
814
if let Some ( comment) = self . comment {
813
815
self . dfa . classes . add ( comment) ;
@@ -970,7 +972,7 @@ impl Reader {
970
972
match state {
971
973
End | StartRecord | EndRecord | InComment | CRLF => End ,
972
974
StartField | EndFieldDelim | EndFieldTerm | InField
973
- | InQuotedField | InEscapedQuote | InDoubleEscapedQuote
975
+ | InQuotedField | InEscapedQuote | InDoubleEscapedQuote | InEscapeSequence
974
976
| InRecordTerm => EndRecord ,
975
977
}
976
978
}
@@ -1007,6 +1009,8 @@ impl Reader {
1007
1009
( EndFieldDelim , NfaInputAction :: Discard )
1008
1010
} else if self . term . equals ( c) {
1009
1011
( EndFieldTerm , NfaInputAction :: Epsilon )
1012
+ } else if !self . quoting && self . escape == Some ( c) {
1013
+ ( InEscapeSequence , NfaInputAction :: Discard )
1010
1014
} else {
1011
1015
( InField , NfaInputAction :: CopyToOutput )
1012
1016
}
@@ -1018,6 +1022,8 @@ impl Reader {
1018
1022
( EndFieldDelim , NfaInputAction :: Discard )
1019
1023
} else if self . term . equals ( c) {
1020
1024
( EndFieldTerm , NfaInputAction :: Epsilon )
1025
+ } else if !self . quoting && self . escape == Some ( c) {
1026
+ ( InEscapeSequence , NfaInputAction :: Discard )
1021
1027
} else {
1022
1028
( InField , NfaInputAction :: CopyToOutput )
1023
1029
}
@@ -1043,6 +1049,7 @@ impl Reader {
1043
1049
( InField , NfaInputAction :: CopyToOutput )
1044
1050
}
1045
1051
}
1052
+ InEscapeSequence => ( InField , NfaInputAction :: CopyToOutput ) ,
1046
1053
InComment => {
1047
1054
if b'\n' == c {
1048
1055
( StartRecord , NfaInputAction :: Discard )
@@ -1087,7 +1094,7 @@ impl Reader {
1087
1094
/// be reached by epsilon transitions will never have explicit usage in the
1088
1095
/// DFA.
1089
1096
const TRANS_CLASSES : usize = 7 ;
1090
- const DFA_STATES : usize = 10 ;
1097
+ const DFA_STATES : usize = 11 ;
1091
1098
const TRANS_SIZE : usize = TRANS_CLASSES * DFA_STATES ;
1092
1099
1093
1100
/// The number of possible transition classes. (See the comment on `TRANS_SIZE`
@@ -1119,6 +1126,8 @@ struct Dfa {
1119
1126
in_field : DfaState ,
1120
1127
/// The DFA state corresponding to being inside an quoted field.
1121
1128
in_quoted : DfaState ,
1129
+ /// The DFA state corresponding to being in an escape sequence.
1130
+ in_escape_sequence : DfaState ,
1122
1131
/// The minimum DFA state that indicates a field has been parsed. All DFA
1123
1132
/// states greater than this are also final-field states.
1124
1133
final_field : DfaState ,
@@ -1135,6 +1144,7 @@ impl Dfa {
1135
1144
classes : DfaClasses :: new ( ) ,
1136
1145
in_field : DfaState ( 0 ) ,
1137
1146
in_quoted : DfaState ( 0 ) ,
1147
+ in_escape_sequence : DfaState ( 0 ) ,
1138
1148
final_field : DfaState ( 0 ) ,
1139
1149
final_record : DfaState ( 0 ) ,
1140
1150
}
@@ -1170,6 +1180,7 @@ impl Dfa {
1170
1180
fn finish ( & mut self ) {
1171
1181
self . in_field = self . new_state ( NfaState :: InField ) ;
1172
1182
self . in_quoted = self . new_state ( NfaState :: InQuotedField ) ;
1183
+ self . in_escape_sequence = self . new_state ( NfaState :: InEscapeSequence ) ;
1173
1184
self . final_field = self . new_state ( NfaState :: EndFieldDelim ) ;
1174
1185
self . final_record = self . new_state ( NfaState :: EndRecord ) ;
1175
1186
}
@@ -1665,6 +1676,15 @@ mod tests {
1665
1676
}
1666
1677
) ;
1667
1678
1679
+ parses_to ! (
1680
+ escape_sequence,
1681
+ "a\\ ,b\\ \\ c,\\ ,fo\" o\\ ,,bar" ,
1682
+ csv![ [ "a,b\\ c" , ",fo\" o," , "bar" ] ] ,
1683
+ |b: & mut ReaderBuilder | {
1684
+ b. quoting( false ) . escape( Some ( b'\\' ) ) ;
1685
+ }
1686
+ ) ;
1687
+
1668
1688
parses_to ! (
1669
1689
delimiter_tabs,
1670
1690
"a\t b" ,
@@ -1863,6 +1883,25 @@ mod tests {
1863
1883
assert_read ! ( rdr, & [ ] , out, 0 , 0 , End ) ;
1864
1884
}
1865
1885
1886
+ // Test we can read escape sequences correctly in a stream.
1887
+ #[ test]
1888
+ fn stream_escape_sequence ( ) {
1889
+ use crate :: ReadFieldResult :: * ;
1890
+
1891
+ let out = & mut [ 0 ; 10 ] ;
1892
+ let mut builder = ReaderBuilder :: new ( ) ;
1893
+ let mut rdr = builder. quoting ( false ) . escape ( Some ( b'\\' ) ) . build ( ) ;
1894
+
1895
+ assert_read ! ( rdr, b( "\\ ,f\\ \\ o\\ " ) , out, 7 , 4 , InputEmpty ) ;
1896
+ assert_eq ! ( & out[ ..4 ] , b( ",f\\ o" ) ) ;
1897
+
1898
+ assert_read ! ( rdr, b( ",o\\ ," ) , & mut out[ 4 ..] , 4 , 3 , InputEmpty ) ;
1899
+ assert_eq ! ( & out[ ..7 ] , b( ",f\\ o,o," ) ) ;
1900
+
1901
+ assert_read ! ( rdr, & [ ] , out, 0 , 0 , Field { record_end: true } ) ;
1902
+ assert_read ! ( rdr, & [ ] , out, 0 , 0 , End ) ;
1903
+ }
1904
+
1866
1905
// Test that empty output buffers don't wreak havoc.
1867
1906
#[ test]
1868
1907
fn stream_empty_output ( ) {
0 commit comments