fix millisecond fraction being handled with wrong scale (#65)

davidhewitt · web-flow · commit 11d636598d50 · 2024-06-26T15:50:57.000Z
* add tests for #61 * fix millisecond fraction being handled with wrong scale * also raise error if fraction too long * additional test cases * update comment * fix doctest
diff --git a/README.md b/README.md
@@ -66,10 +66,13 @@ assert_eq!(dt.to_string(), "2022-01-01T12:13:14Z");
 To control the specifics of time parsing you can use provide a `TimeConfig`:
 
 ```rust
-use speedate::{DateTime, Date, Time, TimeConfig};
+use speedate::{DateTime, Date, Time, TimeConfig, MicrosecondsPrecisionOverflowBehavior};
 let dt = DateTime::parse_bytes_with_config(
     "1689102037.5586429".as_bytes(),
-    &TimeConfig::builder().unix_timestamp_offset(Some(0)).build(),
+    &TimeConfig::builder()
+        .unix_timestamp_offset(Some(0))
+        .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
+        .build(),
 ).unwrap();
 assert_eq!(
     dt,
diff --git a/src/date.rs b/src/date.rs
@@ -56,7 +56,7 @@ impl FromStr for Date {
 
 // 2e10 if greater than this, the number is in ms, if less than or equal, it's in seconds
 // (in seconds this is 11th October 2603, in ms it's 20th August 1970)
-const MS_WATERSHED: i64 = 20_000_000_000;
+pub(crate) const MS_WATERSHED: i64 = 20_000_000_000;
 // 1600-01-01 as a unix timestamp used for from_timestamp below
 const UNIX_1600: i64 = -11_676_096_000;
 // 9999-12-31T23:59:59 as a unix timestamp, used as max allowed value below
@@ -272,11 +272,11 @@ impl Date {
 
     pub(crate) fn timestamp_watershed(timestamp: i64) -> Result<(i64, u32), ParseError> {
         let ts_abs = timestamp.checked_abs().ok_or(ParseError::DateTooSmall)?;
-        let (mut seconds, mut microseconds) = if ts_abs > MS_WATERSHED {
-            (timestamp / 1_000, timestamp % 1_000 * 1000)
-        } else {
-            (timestamp, 0)
-        };
+        if ts_abs <= MS_WATERSHED {
+            return Ok((timestamp, 0));
+        }
+        let mut seconds = timestamp / 1_000;
+        let mut microseconds = ((timestamp % 1_000) * 1000) as i32;
         if microseconds < 0 {
             seconds -= 1;
             microseconds += 1_000_000;
diff --git a/src/datetime.rs b/src/datetime.rs
@@ -1,5 +1,5 @@
-use crate::numbers::{float_parse_bytes, IntFloat};
-use crate::TimeConfigBuilder;
+use crate::date::MS_WATERSHED;
+use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder};
 use crate::{time::TimeConfig, Date, ParseError, Time};
 use std::cmp::Ordering;
 use std::fmt;
@@ -339,14 +339,50 @@ impl DateTime {
     pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result<Self, ParseError> {
         match Self::parse_bytes_rfc3339_with_config(bytes, config) {
             Ok(d) => Ok(d),
-            Err(e) => match float_parse_bytes(bytes) {
-                IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config),
-                IntFloat::Float(float) => {
-                    let micro = (float.fract() * 1_000_000_f64).round() as u32;
-                    Self::from_timestamp_with_config(float.floor() as i64, micro, config)
+            Err(e) => {
+                let mut split = bytes.splitn(2, |&b| b == b'.');
+                let Some(timestamp) =
+                    int_parse_bytes(split.next().expect("splitn always returns at least one element"))
+                else {
+                    return Err(e);
+                };
+                let float_fraction = split.next();
+                debug_assert!(split.next().is_none()); // at most two elements
+                match float_fraction {
+                    // If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility;
+                    // TODO might want to reconsider this later?
+                    Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config),
+                    Some(fract) => {
+                        // fraction is either:
+                        // - up to 3 digits of millisecond fractions, i.e. microseconds
+                        // - or up to 6 digits of second fractions, i.e. milliseconds
+                        let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 };
+                        let Some(fract_integers) = int_parse_bytes(fract) else {
+                            return Err(e);
+                        };
+                        if config.microseconds_precision_overflow_behavior
+                            == MicrosecondsPrecisionOverflowBehavior::Error
+                            && fract.len() > max_digits
+                        {
+                            return Err(if timestamp > MS_WATERSHED {
+                                ParseError::MillisecondFractionTooLong
+                            } else {
+                                ParseError::SecondFractionTooLong
+                            });
+                        }
+                        // TODO: Technically this is rounding, but this is what the existing
+                        // behaviour already did. Probably this is always better than "truncating"
+                        // so we might want to change MicrosecondsPrecisionOverflowBehavior and
+                        // make other uses also round / deprecate truncating.
+                        let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64);
+                        Self::from_timestamp_with_config(
+                            timestamp,
+                            (fract_integers as f64 * multiple).round() as u32,
+                            config,
+                        )
+                    }
                 }
-                IntFloat::Err => Err(e),
-            },
+            }
         }
     }
 
diff --git a/src/lib.rs b/src/lib.rs
@@ -118,6 +118,8 @@ pub enum ParseError {
     SecondFractionTooLong,
     /// second fraction digits missing after `.`
     SecondFractionMissing,
+    /// millisecond fraction value is more than 3 digits long
+    MillisecondFractionTooLong,
     /// invalid digit in duration
     DurationInvalidNumber,
     /// `t` character repeated in duration
diff --git a/tests/main.rs b/tests/main.rs
@@ -864,6 +864,13 @@ param_tests! {
     dt_unix1: ok => "1654646400", "2022-06-08T00:00:00";
     dt_unix2: ok => "1654646404", "2022-06-08T00:00:04";
     dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000";
+    dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456";
+    dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500";
+    dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456";
+    dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04";
+    dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04";
+    dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong;
+    dt_unix_float_ms_too_long: err => "1654646404123.4567", MillisecondFractionTooLong;
     dt_short_date: err => "xxx", TooShort;
     dt_short_time: err => "2020-01-01T12:0", TooShort;
     dt: err => "202x-01-01", InvalidCharYear;
@@ -1390,7 +1397,10 @@ fn test_datetime_parse_bytes_does_not_add_offset_for_rfc3339() {
 fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() {
     let time = DateTime::parse_bytes_with_config(
         "1689102037.5586429".as_bytes(),
-        &(TimeConfigBuilder::new().unix_timestamp_offset(Some(0)).build()),
+        &(TimeConfigBuilder::new()
+            .unix_timestamp_offset(Some(0))
+            .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
+            .build()),
     )
     .unwrap();
     assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643Z");
@@ -1400,7 +1410,10 @@ fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() {
 fn test_datetime_parse_unix_timestamp_from_bytes_as_naive() {
     let time = DateTime::parse_bytes_with_config(
         "1689102037.5586429".as_bytes(),
-        &(TimeConfigBuilder::new().unix_timestamp_offset(None).build()),
+        &(TimeConfigBuilder::new()
+            .unix_timestamp_offset(None)
+            .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
+            .build()),
     )
     .unwrap();
     assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643");