diff --git a/Cargo.toml b/Cargo.toml index 2a049570..c8572466 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ csv-core = { path = "csv-core", version = "0.1.11" } itoa = "1" ryu = "1" serde = "1.0.55" +serde-value = "0.7.0" [dev-dependencies] bstr = { version = "1.7.0", default-features = false, features = ["alloc", "serde"] } diff --git a/examples/tutorial-read-serde-invalid-03.rs b/examples/tutorial-read-serde-invalid-03.rs new file mode 100644 index 00000000..74744df0 --- /dev/null +++ b/examples/tutorial-read-serde-invalid-03.rs @@ -0,0 +1,30 @@ +#![allow(dead_code)] +use std::{error::Error, io, process}; + +use serde::Deserialize; +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct Record { + latitude: f64, + longitude: f64, + #[serde(deserialize_with = "csv::invalid_result")] + population: Result, + city: String, + state: String, +} + +fn run() -> Result<(), Box> { + let mut rdr = csv::Reader::from_reader(io::stdin()); + for result in rdr.deserialize() { + let record: Record = result?; + println!("{:?}", record); + } + Ok(()) +} + +fn main() { + if let Err(err) = run() { + println!("{}", err); + process::exit(1); + } +} diff --git a/src/lib.rs b/src/lib.rs index 314fc368..22af026b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -300,6 +300,9 @@ impl Default for Trim { /// `Option` is deserialized with non-empty but invalid data, then the value /// will be `None` and the error will be ignored. /// +/// Use the [`invalid_result`](./fn.invalid_result.html) function if you want to +/// return the invalid values as `Err` instead of discarding them. +/// /// # Example /// /// This example shows how to parse CSV records with numerical data, even if @@ -343,3 +346,98 @@ where { Option::::deserialize(de).or_else(|_| Ok(None)) } + +/// A custom Serde deserializer for possibly invalid `Result` fields. +/// +/// When deserializing CSV data, it is sometimes desirable to return separately +/// fields with invalid data. For example, there might be a field that is +/// usually a number, but will occasionally contain garbage data that causes +/// number parsing to fail. +/// +/// You might be inclined to use, say, `Result` for fields such at +/// this. However this will not compile out of the box, because Serde does not +/// know when to return `Ok` and when to return `Err`. +/// +/// This function allows you to define the following behavior: if `Result` is deserialized with valid data, then the valid value will be +/// returned as `Ok`, while if it is deserialized with empty or invalid data, +/// then the invalid value will be converted to `String` and returned as +/// `Err`. Note that any invalid UTF-8 bytes are lossily converted to +/// `String`, therefore this function will never fail. +/// +/// Use the [`invalid_option`](./fn.invalid_option.html) function if you want to +/// discard the invalid values instead of returning them as `Err`. +/// +/// # Example +/// +/// This example shows how to parse CSV records with numerical data, even if +/// some numerical data is absent or invalid. Without the +/// `serde(deserialize_with = "...")` annotations, this example would not +/// compile. +/// +/// ``` +/// use std::error::Error; +/// +/// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] +/// struct Row { +/// #[serde(deserialize_with = "csv::invalid_result")] +/// a: Result, +/// #[serde(deserialize_with = "csv::invalid_result")] +/// b: Result, +/// #[serde(deserialize_with = "csv::invalid_result")] +/// c: Result, +/// } +/// +/// # fn main() { example().unwrap(); } +/// fn example() -> Result<(), Box> { +/// let data = "\ +/// a,b,c +/// 5,\"\",xyz +/// "; +/// let mut rdr = csv::Reader::from_reader(data.as_bytes()); +/// if let Some(result) = rdr.deserialize().next() { +/// let record: Row = result?; +/// assert_eq!(record, Row { a: Ok(5), b: Err(String::new()), c: Err(String::from("xyz")) }); +/// Ok(()) +/// } else { +/// Err(From::from("expected at least one record but got none")) +/// } +/// } +/// ``` +pub fn invalid_result<'de, D, T>( + de: D, +) -> result::Result, D::Error> +where + D: Deserializer<'de>, + T: Deserialize<'de>, +{ + let value = serde_value::Value::deserialize(de)?; + let result = T::deserialize(value.clone()).map_err(|_| match value { + serde_value::Value::Bool(b) => b.to_string(), + serde_value::Value::U8(u) => u.to_string(), + serde_value::Value::U16(u) => u.to_string(), + serde_value::Value::U32(u) => u.to_string(), + serde_value::Value::U64(u) => u.to_string(), + serde_value::Value::I8(i) => i.to_string(), + serde_value::Value::I16(i) => i.to_string(), + serde_value::Value::I32(i) => i.to_string(), + serde_value::Value::I64(i) => i.to_string(), + serde_value::Value::F32(f) => f.to_string(), + serde_value::Value::F64(f) => f.to_string(), + serde_value::Value::Char(c) => c.to_string(), + serde_value::Value::String(s) => s, + serde_value::Value::Unit => String::new(), + serde_value::Value::Option(option) => { + format!("{:?}", option) + } + serde_value::Value::Newtype(newtype) => { + format!("{:?}", newtype) + } + serde_value::Value::Seq(seq) => format!("{:?}", seq), + serde_value::Value::Map(map) => format!("{:?}", map), + serde_value::Value::Bytes(bytes) => { + String::from_utf8_lossy(&bytes).into_owned() + } + }); + Ok(result) +} diff --git a/src/tutorial.rs b/src/tutorial.rs index 8b96c095..66e0ace2 100644 --- a/src/tutorial.rs +++ b/src/tutorial.rs @@ -1136,6 +1136,62 @@ function is a generic helper function that does one very simple thing: when applied to `Option` fields, it will convert any deserialization error into a `None` value. This is useful when you need to work with messy CSV data. +Sometimes you might need to return invalid fields instead of discarding them. +For this you can use the similar +[`invalid_result`](../fn.invalid_result.html) +function, which works as follows: when applied to `Result` fields, +it will convert any invalid filed to a `String` and return it as `Err(string)`. +Note that any invalid UTF-8 bytes are lossily converted to `String`, therefore +this function will never fail. + +This behavior can be achieved with very minor changes to the previous example: + +```no_run +//tutorial-read-serde-invalid-03.rs +# #![allow(dead_code)] +# use std::{error::Error, io, process}; +# +# use serde::Deserialize; +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct Record { + latitude: f64, + longitude: f64, + #[serde(deserialize_with = "csv::invalid_result")] + population: Result, + city: String, + state: String, +} + +fn run() -> Result<(), Box> { + let mut rdr = csv::Reader::from_reader(io::stdin()); + for result in rdr.deserialize() { + let record: Record = result?; + println!("{:?}", record); + } + Ok(()) +} +# +# fn main() { +# if let Err(err) = run() { +# println!("{}", err); +# process::exit(1); +# } +# } +``` + +If you compile and run this last example, then it should run to completion just +like the previous one but with the following output: + +```text +$ cargo build +$ ./target/debug/csvtutor < uspop-null.csv +Record { latitude: 65.2419444, longitude: -165.2716667, population: Err(""), city: "Davidsons Landing", state: "AK" } +Record { latitude: 60.5544444, longitude: -151.2583333, population: Ok(7610), city: "Kenai", state: "AK" } +Record { latitude: 33.7133333, longitude: -87.3886111, population: Err(""), city: "Oakman", state: "AL" } +# ... and more +``` + # Writing CSV In this section we'll show a few examples that write CSV data. Writing CSV data