diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 79a0713e61..ed51358b1e 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -232,7 +232,7 @@ public CSVFormat getFormat() { * @see Predefined#Default */ public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, - null, null, null, false, false, false, false, false); + null, null, null, false, false, false, false, false,null); /** * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is @@ -427,7 +427,7 @@ private static boolean isLineBreak(final Character c) { */ public static CSVFormat newFormat(final char delimiter) { return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, - false, false, false); + false, false, false,null); } /** @@ -468,6 +468,8 @@ public static CSVFormat valueOf(final String format) { private final String recordSeparator; // for outputs + private final Character recordSeparatorForInput; // for inputs + private final boolean skipHeaderRecord; private final boolean trailingDelimiter; @@ -509,6 +511,8 @@ public static CSVFormat valueOf(final String format) { * TODO * @param trailingDelimiter * TODO + * @param recordSeparatorForInput + * the line separator to use for input * @throws IllegalArgumentException * if the delimiter is a line break character */ @@ -517,7 +521,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, - final boolean trailingDelimiter) { + final boolean trailingDelimiter,Character recordSeparatorForInput) { this.delimiter = delimiter; this.quoteCharacter = quoteChar; this.quoteMode = quoteMode; @@ -534,6 +538,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo this.ignoreHeaderCase = ignoreHeaderCase; this.trailingDelimiter = trailingDelimiter; this.trim = trim; + this.recordSeparatorForInput=recordSeparatorForInput; validate(); } @@ -748,6 +753,14 @@ public QuoteMode getQuoteMode() { public String getRecordSeparator() { return recordSeparator; } + /** + * Returns the record separator delimiting input records. + * + * @return the record separator + */ + public Character getRecordSeparatorForInput() { + return recordSeparatorForInput; + } /** * Returns whether to skip the header record. @@ -793,6 +806,7 @@ public int hashCode() { result = prime * result + (ignoreEmptyLines ? 1231 : 1237); result = prime * result + (skipHeaderRecord ? 1231 : 1237); result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); + result = prime * result + ((recordSeparatorForInput == null) ? 0 : recordSeparatorForInput.hashCode()); result = prime * result + Arrays.hashCode(header); return result; } @@ -1188,6 +1202,10 @@ public String toString() { sb.append(' '); sb.append("RecordSeparator=<").append(recordSeparator).append('>'); } + if (recordSeparatorForInput != null) { + sb.append(' '); + sb.append("recordSeparatorForInput=<").append(recordSeparatorForInput).append('>'); + } if (getIgnoreEmptyLines()) { sb.append(" EmptyLines:ignored"); } @@ -1311,7 +1329,7 @@ public CSVFormat withAllowMissingColumnNames() { public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1346,7 +1364,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1364,7 +1382,7 @@ public CSVFormat withDelimiter(final char delimiter) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1395,7 +1413,7 @@ public CSVFormat withEscape(final Character escape) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1550,7 +1568,7 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio public CSVFormat withHeader(final String... header) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1571,7 +1589,7 @@ public CSVFormat withHeader(final String... header) { public CSVFormat withHeaderComments(final Object... headerComments) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1596,7 +1614,7 @@ public CSVFormat withIgnoreEmptyLines() { public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1622,7 +1640,7 @@ public CSVFormat withIgnoreHeaderCase() { public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1647,7 +1665,7 @@ public CSVFormat withIgnoreSurroundingSpaces() { public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1666,7 +1684,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac public CSVFormat withNullString(final String nullString) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1697,7 +1715,7 @@ public CSVFormat withQuote(final Character quoteChar) { } return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1711,7 +1729,7 @@ public CSVFormat withQuote(final Character quoteChar) { public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1731,6 +1749,25 @@ public CSVFormat withRecordSeparator(final char recordSeparator) { return withRecordSeparator(String.valueOf(recordSeparator)); } + /** + * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently + * only works for inputs with '\n', '\r' and "\r\n" + *

+ * + * @param recordSeparatorForInput + * the record separator to use for input. + * + * @return A new CSVFormat that is equal to this but with the the specified input record separator + */ + public CSVFormat withRecordSeparatorForInput(final char recordSeparatorForInput) { + return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, + ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); + } + /** * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. * @@ -1749,7 +1786,7 @@ public CSVFormat withRecordSeparator(final char recordSeparator) { public CSVFormat withRecordSeparator(final String recordSeparator) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1776,7 +1813,7 @@ public CSVFormat withSkipHeaderRecord() { public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1801,7 +1838,7 @@ public CSVFormat withTrailingDelimiter() { public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } /** @@ -1826,6 +1863,6 @@ public CSVFormat withTrim() { public CSVFormat withTrim(final boolean trim) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput); } } diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 0329c356a6..7e799b1bd3 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -55,6 +55,8 @@ final class Lexer implements Closeable { private final boolean ignoreSurroundingSpaces; private final boolean ignoreEmptyLines; + private final Character recordSeparatorForInput; + /** The input stream */ private final ExtendedBufferedReader reader; @@ -66,6 +68,7 @@ final class Lexer implements Closeable { this.commentStart = mapNullToDisabled(format.getCommentMarker()); this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); this.ignoreEmptyLines = format.getIgnoreEmptyLines(); + this.recordSeparatorForInput=format.getRecordSeparatorForInput(); } /** @@ -370,12 +373,16 @@ void trimTrailingSpaces(final StringBuilder buffer) { * @return true if the given or next character is a line-terminator */ boolean readEndOfLine(int ch) throws IOException { - // check if we have \r\n... - if (ch == CR && reader.lookAhead() == LF) { - // note: does not change ch outside of this method! - ch = reader.read(); + if (recordSeparatorForInput == null){ + // check if we have \r\n... + if (ch == CR && reader.lookAhead() == LF) { + // note: does not change ch outside of this method! + ch = reader.read(); + } + return ch == LF || ch == CR; + }else{ + return ch == recordSeparatorForInput.charValue(); } - return ch == LF || ch == CR; } boolean isClosed() {