Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add recordSeparatorForInput #16

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 56 additions & 19 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ public CSVFormat getFormat() {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
null, null, null, false, false, false, false, false);
null, null, null, false, false, false, false, false,null);

/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
Expand Down Expand Up @@ -427,7 +427,7 @@ private static boolean isLineBreak(final Character c) {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
false, false, false);
false, false, false,null);
}

/**
Expand Down Expand Up @@ -468,6 +468,8 @@ public static CSVFormat valueOf(final String format) {

private final String recordSeparator; // for outputs

private final Character recordSeparatorForInput; // for inputs

private final boolean skipHeaderRecord;

private final boolean trailingDelimiter;
Expand Down Expand Up @@ -509,6 +511,8 @@ public static CSVFormat valueOf(final String format) {
* TODO
* @param trailingDelimiter
* TODO
* @param recordSeparatorForInput
* the line separator to use for input
* @throws IllegalArgumentException
* if the delimiter is a line break character
*/
Expand All @@ -517,7 +521,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
final boolean trailingDelimiter) {
final boolean trailingDelimiter,Character recordSeparatorForInput) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
Expand All @@ -534,6 +538,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
this.ignoreHeaderCase = ignoreHeaderCase;
this.trailingDelimiter = trailingDelimiter;
this.trim = trim;
this.recordSeparatorForInput=recordSeparatorForInput;
validate();
}

Expand Down Expand Up @@ -748,6 +753,14 @@ public QuoteMode getQuoteMode() {
public String getRecordSeparator() {
return recordSeparator;
}
/**
* Returns the record separator delimiting input records.
*
* @return the record separator
*/
public Character getRecordSeparatorForInput() {
return recordSeparatorForInput;
}

/**
* Returns whether to skip the header record.
Expand Down Expand Up @@ -793,6 +806,7 @@ public int hashCode() {
result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
result = prime * result + (skipHeaderRecord ? 1231 : 1237);
result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
result = prime * result + ((recordSeparatorForInput == null) ? 0 : recordSeparatorForInput.hashCode());
result = prime * result + Arrays.hashCode(header);
return result;
}
Expand Down Expand Up @@ -1188,6 +1202,10 @@ public String toString() {
sb.append(' ');
sb.append("RecordSeparator=<").append(recordSeparator).append('>');
}
if (recordSeparatorForInput != null) {
sb.append(' ');
sb.append("recordSeparatorForInput=<").append(recordSeparatorForInput).append('>');
}
if (getIgnoreEmptyLines()) {
sb.append(" EmptyLines:ignored");
}
Expand Down Expand Up @@ -1311,7 +1329,7 @@ public CSVFormat withAllowMissingColumnNames() {
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand Down Expand Up @@ -1346,7 +1364,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1364,7 +1382,7 @@ public CSVFormat withDelimiter(final char delimiter) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand Down Expand Up @@ -1395,7 +1413,7 @@ public CSVFormat withEscape(final Character escape) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand Down Expand Up @@ -1550,7 +1568,7 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio
public CSVFormat withHeader(final String... header) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1571,7 +1589,7 @@ public CSVFormat withHeader(final String... header) {
public CSVFormat withHeaderComments(final Object... headerComments) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1596,7 +1614,7 @@ public CSVFormat withIgnoreEmptyLines() {
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1622,7 +1640,7 @@ public CSVFormat withIgnoreHeaderCase() {
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1647,7 +1665,7 @@ public CSVFormat withIgnoreSurroundingSpaces() {
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1666,7 +1684,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
public CSVFormat withNullString(final String nullString) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand Down Expand Up @@ -1697,7 +1715,7 @@ public CSVFormat withQuote(final Character quoteChar) {
}
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1711,7 +1729,7 @@ public CSVFormat withQuote(final Character quoteChar) {
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1731,6 +1749,25 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
return withRecordSeparator(String.valueOf(recordSeparator));
}

/**
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
*
* <p>
* <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
* only works for inputs with '\n', '\r' and "\r\n"
* </p>
*
* @param recordSeparatorForInput
* the record separator to use for input.
*
* @return A new CSVFormat that is equal to this but with the the specified input record separator
*/
public CSVFormat withRecordSeparatorForInput(final char recordSeparatorForInput) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
*
Expand All @@ -1749,7 +1786,7 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
public CSVFormat withRecordSeparator(final String recordSeparator) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1776,7 +1813,7 @@ public CSVFormat withSkipHeaderRecord() {
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1801,7 +1838,7 @@ public CSVFormat withTrailingDelimiter() {
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}

/**
Expand All @@ -1826,6 +1863,6 @@ public CSVFormat withTrim() {
public CSVFormat withTrim(final boolean trim) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
}
}
17 changes: 12 additions & 5 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ final class Lexer implements Closeable {
private final boolean ignoreSurroundingSpaces;
private final boolean ignoreEmptyLines;

private final Character recordSeparatorForInput;

/** The input stream */
private final ExtendedBufferedReader reader;

Expand All @@ -66,6 +68,7 @@ final class Lexer implements Closeable {
this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
this.recordSeparatorForInput=format.getRecordSeparatorForInput();
}

/**
Expand Down Expand Up @@ -370,12 +373,16 @@ void trimTrailingSpaces(final StringBuilder buffer) {
* @return true if the given or next character is a line-terminator
*/
boolean readEndOfLine(int ch) throws IOException {
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
if (recordSeparatorForInput == null){
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
}
return ch == LF || ch == CR;
}else{
return ch == recordSeparatorForInput.charValue();
}
return ch == LF || ch == CR;
}

boolean isClosed() {
Expand Down