From 5fc4efe09b77d3a02f928c4e9bd30121714a5bc9 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 18 Oct 2023 16:02:02 +0200 Subject: [PATCH 1/5] rename `_edf_repr` to `edf_header_string` --- src/write.jl | 18 +++++++++--------- test/runtests.jl | 28 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/write.jl b/src/write.jl index 65940b4..b264898 100644 --- a/src/write.jl +++ b/src/write.jl @@ -2,14 +2,14 @@ ##### utilities ##### -_edf_repr(value::Union{String,Char}) = value -_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy")) -_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") +edf_header_string(value::Union{String,Char}) = value +edf_header_string(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy")) +edf_header_string(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") # XXX this is really really hacky and doesn't support use of scientific notation # where appropriate; keep in mind if you do improve this to support scientific # notation, that scientific is NOT allowed in EDF annotation onset/duration fields -function _edf_repr(x::Real) +function edf_header_string(x::Real) result = missing if isinteger(x) str = string(trunc(Int, x)) @@ -39,9 +39,9 @@ function _edf_repr(x::Real) end _edf_metadata_repr(::Missing) = 'X' -_edf_metadata_repr(x) = _edf_repr(x) +_edf_metadata_repr(x) = edf_header_string(x) -function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}} +function edf_header_string(metadata::T) where {T<:Union{PatientID,RecordingID}} header = T <: RecordingID ? String["Startdate"] : String[] return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], @@ -49,7 +49,7 @@ function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}} end function edf_write(io::IO, value, byte_limit::Integer) - edf_value = _edf_repr(value) + edf_value = edf_header_string(value) sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value") bytes_written = Base.write(io, edf_value) @@ -144,10 +144,10 @@ function write_tal(io::IO, tal::TimestampedAnnotationList) if !signbit(tal.onset_in_seconds) # otherwise, the `-` will already be in number string bytes_written += Base.write(io, '+') end - bytes_written += Base.write(io, _edf_repr(tal.onset_in_seconds)) + bytes_written += Base.write(io, edf_header_string(tal.onset_in_seconds)) if tal.duration_in_seconds !== nothing bytes_written += Base.write(io, 0x15) - bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds)) + bytes_written += Base.write(io, edf_header_string(tal.duration_in_seconds)) end if isempty(tal.annotations) bytes_written += Base.write(io, 0x14) diff --git a/test/runtests.jl b/test/runtests.jl index 3a35121..b3ea91c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -118,23 +118,23 @@ const DATADIR = joinpath(@__DIR__, "data") @test eof(io) end - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-0.0023405432)) == + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-0.0023405432)) == "-0.00234" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002340" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1011.05432)) == + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343" + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054" + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1011.05432)) == "-1011.05" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441" - @test EDF._edf_repr(34577777) == "34577777" - @test EDF._edf_repr(0.0345) == "0.034500" - @test EDF._edf_repr(-0.02) == "-0.02000" - @test EDF._edf_repr(-187.74445) == "-187.744" - @test_throws ErrorException EDF._edf_repr(123456789) - @test_throws ErrorException EDF._edf_repr(-12345678) - @test_throws ErrorException EDF._edf_repr(0.00000000024) + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442" + @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441" + @test EDF.edf_header_string(34577777) == "34577777" + @test EDF.edf_header_string(0.0345) == "0.034500" + @test EDF.edf_header_string(-0.02) == "-0.02000" + @test EDF.edf_header_string(-187.74445) == "-187.744" + @test_throws ErrorException EDF.edf_header_string(123456789) + @test_throws ErrorException EDF.edf_header_string(-12345678) + @test_throws ErrorException EDF.edf_header_string(0.00000000024) @test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4) uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf")) From a5f9c89625e1bccb490dfc3cd6d4b9ee429e9f97 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:01:28 +0200 Subject: [PATCH 2/5] better errors --- src/types.jl | 22 ++------- src/write.jl | 116 ++++++++++++++++++++++++++++++++++------------- test/runtests.jl | 39 ++++++++-------- 3 files changed, 105 insertions(+), 72 deletions(-) diff --git a/src/types.jl b/src/types.jl index 39e0f50..6564f94 100644 --- a/src/types.jl +++ b/src/types.jl @@ -88,14 +88,9 @@ const ANNOTATIONS_SIGNAL_LABEL = ["EDF Annotations", "BDF Annotations"] EDF.TimestampedAnnotationList A type representing a time-stamped annotations list (TAL). - -Note that this type's constructor may attempt to round given `onset_in_seconds` and -`duration_in_seconds` arguments to their nearest representable values in accordance -with the EDF+ specification, which a) represents these values as ASCII, b) constrains -these values to an 8 character limit, and c) does not allow the use of scientific -notation for these fields. - -See EDF+ specification for details. + +When writing onset and durations, the values will be rounded to the nearest 100 milliseconds, +using [`EDF.edf_annotation_time_string`](@ref). # Fields @@ -107,17 +102,6 @@ struct TimestampedAnnotationList onset_in_seconds::Float64 duration_in_seconds::Union{Float64,Nothing} annotations::Vector{String} - function TimestampedAnnotationList(onset_in_seconds, duration_in_seconds, annotations) - onset_in_seconds = _nearest_representable_edf_time_value(onset_in_seconds) - duration_in_seconds = _nearest_representable_edf_time_value(duration_in_seconds) - return new(onset_in_seconds, duration_in_seconds, annotations) - end -end - -_nearest_representable_edf_time_value(::Nothing) = nothing - -function _nearest_representable_edf_time_value(x) - return round(x; digits=(8 - (ndigits(floor(Int, x)) + signbit(x) + isinteger(x)))) end function Base.:(==)(a::TimestampedAnnotationList, b::TimestampedAnnotationList) diff --git a/src/write.jl b/src/write.jl index b264898..660515e 100644 --- a/src/write.jl +++ b/src/write.jl @@ -2,14 +2,18 @@ ##### utilities ##### -edf_header_string(value::Union{String,Char}) = value -edf_header_string(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy")) -edf_header_string(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") +edf_header_string(header_field_name, value::Union{String,Char}) = value +function edf_header_string(header_field_name, date::Date) + return uppercase(Dates.format(date, dateformat"dd-u-yyyy")) +end +function edf_header_string(header_field_name, date::DateTime) + return Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") +end # XXX this is really really hacky and doesn't support use of scientific notation # where appropriate; keep in mind if you do improve this to support scientific # notation, that scientific is NOT allowed in EDF annotation onset/duration fields -function edf_header_string(x::Real) +function edf_header_string(header_field_name, x::Real) result = missing if isinteger(x) str = string(trunc(Int, x)) @@ -28,30 +32,60 @@ function edf_header_string(x::Real) end end if !ismissing(result) - if all(c -> c in ('0', '.', '-'), result) - x == 0 && return result - else - return result + roundtrip = parse(Float32, result) + err = abs(roundtrip - x) + tol = 1e-3 + if err > tol + encoding_suggestion = header_field_name in + (:digital_minimum, :digital_maximum, :physical_minimum, + :physical_maximum) ? + """ + We suggest choosing new encoding parameters to accomodate 8-character rendering. + These can be verified with `EDF.edf_header_string`. + """ : "" + throw(ArgumentError(""" + Error writing header field $header_field_name + Value: $x + This value was encoded into an 8-character ASCII string: $result + This yields roundtripping error: $err greater than the allowed tolerance ($tol) + $encoding_suggestion""")) end + return result end - error("failed to fit number into EDF's 8 ASCII character limit: $x") + error("failed to fit header field $header_field_name into EDF's 8 ASCII character limit. Got: $x") return nothing end -_edf_metadata_repr(::Missing) = 'X' -_edf_metadata_repr(x) = edf_header_string(x) +_edf_metadata_repr(header_field_name, ::Missing) = 'X' +_edf_metadata_repr(header_field_name, x) = edf_header_string(header_field_name, x) -function edf_header_string(metadata::T) where {T<:Union{PatientID,RecordingID}} +function edf_header_string(header_field_name, + metadata::T) where {T<:Union{PatientID,RecordingID}} header = T <: RecordingID ? String["Startdate"] : String[] - return join([header; - [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], + return join([header + [_edf_metadata_repr(name, getfield(metadata, name)) + for name in fieldnames(T)]], ' ') end -function edf_write(io::IO, value, byte_limit::Integer) - edf_value = edf_header_string(value) +function edf_header_validate(c::AbstractChar) + return Char(32) <= c <= Char(126) +end + +function edf_header_validate(str::AbstractString) + return all(edf_header_validate, str) +end +function edf_write(io::IO, header_field_name, value, byte_limit::Integer; + validate_ascii=true) + edf_value = edf_header_string(header_field_name, value) + if validate_ascii + valid = edf_header_validate(edf_value) + if !valid + throw(ArgumentError("EDF+ specification requires all characters written in a string in the header to use US-ASCII characters between 32 and 126. Got: $edf_value from field $header_field_name")) + end + end sizeof(edf_value) > byte_limit && - error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value") + error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value for field $header_field_name") bytes_written = Base.write(io, edf_value) while bytes_written < byte_limit bytes_written += Base.write(io, UInt8(' ')) @@ -79,29 +113,33 @@ end ##### `write_header` ##### -function write_header(io::IO, file::File) +function write_header(io::IO, file::File; validate_ascii=true) length(file.signals) <= 9999 || error("EDF does not allow files with more than 9999 signals") expected_bytes_written = BYTES_PER_FILE_HEADER + BYTES_PER_SIGNAL_HEADER * length(file.signals) bytes_written = 0 - bytes_written += edf_write(io, file.header.version, 8) - bytes_written += edf_write(io, file.header.patient, 80) - bytes_written += edf_write(io, file.header.recording, 80) - bytes_written += edf_write(io, file.header.start, 16) - bytes_written += edf_write(io, expected_bytes_written, 8) - bytes_written += edf_write(io, file.header.is_contiguous ? "EDF+C" : "EDF+D", 44) - bytes_written += edf_write(io, file.header.record_count, 8) - bytes_written += edf_write(io, file.header.seconds_per_record, 8) - bytes_written += edf_write(io, length(file.signals), 4) + bytes_written += edf_write(io, "version", file.header.version, 8; validate_ascii) + bytes_written += edf_write(io, "patient", file.header.patient, 80; validate_ascii) + bytes_written += edf_write(io, "recording", file.header.recording, 80; validate_ascii) + bytes_written += edf_write(io, "start", file.header.start, 16; validate_ascii) + bytes_written += edf_write(io, "", expected_bytes_written, 8; validate_ascii) + bytes_written += edf_write(io, "is_contiguous", + file.header.is_contiguous ? "EDF+C" : "EDF+D", 44; + validate_ascii) + bytes_written += edf_write(io, "record_count", file.header.record_count, 8; + validate_ascii) + bytes_written += edf_write(io, "seconds_per_record", file.header.seconds_per_record, 8; + validate_ascii) + bytes_written += edf_write(io, "", length(file.signals), 4; validate_ascii) signal_headers = SignalHeader.(file.signals) for (field_name, byte_limit) in SIGNAL_HEADER_FIELDS for signal_header in signal_headers field = getfield(signal_header, field_name) - bytes_written += edf_write(io, field, byte_limit) + bytes_written += edf_write(io, field_name, field, byte_limit; validate_ascii) end end - bytes_written += edf_write(io, ' ', 32 * length(file.signals)) + bytes_written += edf_write(io, "", ' ', 32 * length(file.signals); validate_ascii) @assert bytes_written == expected_bytes_written return bytes_written end @@ -139,15 +177,28 @@ function write_signal_record(io::IO, signal::AnnotationsSignal, record_index::In return bytes_written end +""" + edf_annotation_time_string(time_in_seconds) -> String + +Returns a string representing the time in seconds to the nearest 100 milliseconds. + +Implemented by: `@sprintf("%.4f", time_in_seconds)`. + +Resolution chosen to match EDFlib: . +""" +function edf_annotation_time_string(time_in_seconds) + return @sprintf("%.4f", time_in_seconds) +end + function write_tal(io::IO, tal::TimestampedAnnotationList) bytes_written = 0 if !signbit(tal.onset_in_seconds) # otherwise, the `-` will already be in number string bytes_written += Base.write(io, '+') end - bytes_written += Base.write(io, edf_header_string(tal.onset_in_seconds)) + bytes_written += Base.write(io, edf_annotation_time_string(tal.onset_in_seconds)) if tal.duration_in_seconds !== nothing bytes_written += Base.write(io, 0x15) - bytes_written += Base.write(io, edf_header_string(tal.duration_in_seconds)) + bytes_written += Base.write(io, edf_annotation_time_string(tal.duration_in_seconds)) end if isempty(tal.annotations) bytes_written += Base.write(io, 0x14) @@ -183,7 +234,8 @@ function write(io::IO, file::File) """ throw(ArgumentError(message)) end - return write_header(io, file) + write_signals(io, file) + validate_ascii = !is_bdf(file) + return write_header(io, file; validate_ascii) + write_signals(io, file) end write(path::AbstractString, file::File) = Base.open(io -> write(io, file), path, "w") diff --git a/test/runtests.jl b/test/runtests.jl index b3ea91c..b6f2265 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,7 +74,7 @@ const DATADIR = joinpath(@__DIR__, "data") TimestampedAnnotationList(2.5, 2.5, ["type A"])], [TimestampedAnnotationList(5.0, nothing, String[""])]] @test all(signal.records .== expected) - @test AnnotationsSignal(signal.records).samples_per_record == 16 + @test AnnotationsSignal(signal.records).samples_per_record == 17 end end @@ -118,24 +118,21 @@ const DATADIR = joinpath(@__DIR__, "data") @test eof(io) end - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-0.0023405432)) == - "-0.00234" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(0.0023405432)) == - "0.002340" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1011.05432)) == - "-1011.05" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442" - @test EDF.edf_header_string(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441" - @test EDF.edf_header_string(34577777) == "34577777" - @test EDF.edf_header_string(0.0345) == "0.034500" - @test EDF.edf_header_string(-0.02) == "-0.02000" - @test EDF.edf_header_string(-187.74445) == "-187.744" - @test_throws ErrorException EDF.edf_header_string(123456789) - @test_throws ErrorException EDF.edf_header_string(-12345678) - @test_throws ErrorException EDF.edf_header_string(0.00000000024) - @test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4) + @test EDF.edf_header_string("", -0.0023405432) == "-0.00234" + @test EDF.edf_header_string("", 0.0023405432) == "0.002340" + @test EDF.edf_header_string("", 1.002343) == "1.002343" + @test EDF.edf_header_string("", 1011.05432) == "1011.054" + @test EDF.edf_header_string("", -1011.0543) == "-1011.05" + @test EDF.edf_header_string("", -1013441.5) == "-1013442" + @test EDF.edf_header_string("", -1013441.3) == "-1013441" + @test EDF.edf_header_string("", 34577777) == "34577777" + @test EDF.edf_header_string("", 0.0345) == "0.034500" + @test EDF.edf_header_string("", -0.02) == "-0.02000" + @test EDF.edf_header_string("", -187.74445) == "-187.744" + @test EDF.edf_header_string("", 0.00000000024) == "0.000000" + @test_throws ErrorException EDF.edf_header_string("", 123456789) + @test_throws ErrorException EDF.edf_header_string("", -12345678) + @test_throws ErrorException EDF.edf_write(IOBuffer(), "", "hahahahaha", 4) uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf")) @test sprint(show, uneven) == "EDF.File with 2 16-bit-encoded signals" @@ -169,7 +166,7 @@ const DATADIR = joinpath(@__DIR__, "data") # ``` mne = map(line -> parse(Float32, line), eachline(joinpath(DATADIR, "mne_values.csv"))) for (a, b) in zip(EDF.decode(signal), mne) - @test a ≈ b atol=0.01 + @test a ≈ b atol = 0.01 end # Truncated files @@ -227,7 +224,7 @@ const DATADIR = joinpath(@__DIR__, "data") for i in 1:8 bdf_values = EDF.decode(bdf.signals[i]) comp_values = EDF.decode(comp.signals[i]) - @test bdf_values ≈ comp_values rtol=0.01 + @test bdf_values ≈ comp_values rtol = 0.01 end # Ensure that BDF files can also be round-tripped mktempdir() do dir From 668e1b4570511986b11a2337a8fd9a09dd6c6ee7 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:09:27 +0200 Subject: [PATCH 3/5] update tests --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b6f2265..a190809 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -122,9 +122,9 @@ const DATADIR = joinpath(@__DIR__, "data") @test EDF.edf_header_string("", 0.0023405432) == "0.002340" @test EDF.edf_header_string("", 1.002343) == "1.002343" @test EDF.edf_header_string("", 1011.05432) == "1011.054" - @test EDF.edf_header_string("", -1011.0543) == "-1011.05" - @test EDF.edf_header_string("", -1013441.5) == "-1013442" - @test EDF.edf_header_string("", -1013441.3) == "-1013441" + @test_throws ArgumentError EDF.edf_header_string("", -1011.0543) == "-1011.05" + @test_throws ArgumentError EDF.edf_header_string("", -1013441.5) == "-1013442" + @test_throws ArgumentError EDF.edf_header_string("", -1013441.3) == "-1013441" @test EDF.edf_header_string("", 34577777) == "34577777" @test EDF.edf_header_string("", 0.0345) == "0.034500" @test EDF.edf_header_string("", -0.02) == "-0.02000" From c35fccd029b643e51d979d81399a9d1c516ec311 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:09:43 +0200 Subject: [PATCH 4/5] up --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 9deffef..5f7fca5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "EDF" uuid = "ccffbfc1-f56e-50fb-a33b-53d1781b2825" authors = ["Beacon Biosignals, Inc."] -version = "0.7.4" +version = "0.8.0" [deps] BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" From f6fbea2705f4cd8bbee348d2216e490ec3d8eb63 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:15:04 +0200 Subject: [PATCH 5/5] bump version --- .github/workflows/ci.yml | 2 +- Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a8b63c..aa1103d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: version: - - '1.4' # Earliest supported version + - '1.6' # Earliest supported version - '1' # Latest release - 'nightly' os: diff --git a/Project.toml b/Project.toml index 5f7fca5..8a0ac44 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] BitIntegers = "0.2" FilePathsBase = "0.9.13" -julia = "1.4" +julia = "1.6" [extras] FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"