Skip to content

Commit

Permalink
csv: convert datetimes with time zones to local date, mostly (WIP)
Browse files Browse the repository at this point in the history
Abandoned for now, because in a non-UTC timezone, dates with no
time/timezone all get adjusted (they are assumed to be 00:00:00 UTC).
  • Loading branch information
simonmichael committed Jun 26, 2021
1 parent fc364cd commit d48083c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 17 deletions.
34 changes: 22 additions & 12 deletions hledger-lib/Hledger/Read/CsvReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import Data.Time.Calendar (Day)
import Data.Time (UTCTime, Day, localDay, utcToLocalTime, getCurrentTimeZone, LocalTime (LocalTime))
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe (atMay, headMay, lastMay, readDef, readMay)
import System.Directory (doesFileExist)
Expand All @@ -78,6 +78,7 @@ import Text.Printf (printf)
import Hledger.Data
import Hledger.Utils
import Hledger.Read.Common (aliasesFromOpts, Reader(..),InputOpts(..), amountp, statusp, genericSourcePos, journalFinalise )
import Data.Time.LocalTime (TimeZone)

--- ** doctest setup
-- $setup
Expand Down Expand Up @@ -741,6 +742,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
-- let (headerlines, datalines) = identifyHeaderLines records
-- mfieldnames = lastMay headerlines

tz <- getCurrentTimeZone
let
-- convert CSV records to transactions
txns = dbg7 "csv txns" $ snd $ mapAccumL
Expand All @@ -750,7 +752,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
line' = (mkPos . (+1) . unPos) line
pos' = SourcePos name line' col
in
(pos, transactionFromCsvRecord pos' rules r)
(pos, transactionFromCsvRecord pos' rules tz r)
)
(initialPos parsecfilename) records

Expand Down Expand Up @@ -874,8 +876,8 @@ hledgerField = getEffectiveAssignment
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record

transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
transactionFromCsvRecord :: SourcePos -> CsvRules -> TimeZone -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules tz record = t
where
----------------------------------------------------------------------
-- 1. Define some helpers:
Expand All @@ -884,7 +886,7 @@ transactionFromCsvRecord sourcepos rules record = t
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe Text
parsedate = parseDateWithCustomOrDefaultFormats (rule "date-format")
parsedate = parseDateWithCustomOrDefaultFormats tz (rule "date-format")
mkdateerror datefield datevalue mdateformat = T.unpack $ T.unlines
["error: could not parse \""<>datevalue<>"\" as a date using date format "
<>maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" (T.pack . show) mdateformat
Expand Down Expand Up @@ -1269,16 +1271,24 @@ csvFieldValue rules record fieldname = do
fieldvalue <- T.strip <$> atMay record (fieldindex-1)
return fieldvalue

-- | Parse the date string using the specified date-format, or if unspecified
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
-- zeroes optional).
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> Text -> Maybe Day
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith formats
-- | Parse a date from a date/datetime string using the specified strptime format,
-- or else try all the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD
-- with optional leading zeroes).
--
-- If the string includes time and time zone, the local date (in the provided
-- local time zone) will be returned. This could be a day earlier or later than
-- the one in the string.
parseDateWithCustomOrDefaultFormats :: TimeZone -> Maybe DateFormat -> Text -> Maybe Day
parseDateWithCustomOrDefaultFormats tz mformat s = do
ut <- asum $ map parsewith formats :: Maybe UTCTime
let lt = utcToLocalTime tz ut :: LocalTime
let ld = localDay lt :: Day
return ld
where
parsewith = flip (parseTimeM True defaultTimeLocale) (T.unpack s)
formats = map T.unpack $ maybe
["%Y/%-m/%-d"
,"%Y-%-m-%-d"
["%Y-%-m-%-d"
,"%Y/%-m/%-d"
,"%Y.%-m.%-d"
-- ,"%-m/%-d/%Y"
-- ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
Expand Down
8 changes: 3 additions & 5 deletions hledger/hledger.m4.md
Original file line number Diff line number Diff line change
Expand Up @@ -3865,11 +3865,9 @@ date-format %-m/%-d/%Y %l:%M %p some other junk
For the supported strptime syntax, see:\
<https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>

Note that although you can parse date-times which include a time zone,
that time zone is ignored; it will not change the date that is parsed.
This means when reading CSV data with times not in your local time zone,
dates can be "off by one".

Note: date-times which include a time zone, different from your own local time zone,
will usually be parsed as the correct date in your time zone; but in certain situations
with daylight savings, it's possible for the parsed date to be "off by one".

### `decimal-mark`

Expand Down

0 comments on commit d48083c

Please sign in to comment.