Skip to content

Commit 30cb2a7

Browse files
committed
add more 2d kde handeling functions
1 parent 122d701 commit 30cb2a7

File tree

6 files changed

+79
-5
lines changed

6 files changed

+79
-5
lines changed

Manifest.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
julia_version = "1.10.0"
44
manifest_format = "2.0"
5-
project_hash = "49de629289277247d16c6846b32eb296b8facd17"
5+
project_hash = "71f408cd0b90fec1446e7c41f51cc12750942e43"
66

77
[[deps.AbstractFFTs]]
88
deps = ["LinearAlgebra"]
@@ -1126,6 +1126,12 @@ deps = ["Dates"]
11261126
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
11271127
version = "1.0.3"
11281128

1129+
[[deps.TSVD]]
1130+
deps = ["Adapt", "LinearAlgebra"]
1131+
git-tree-sha1 = "c39caef6bae501e5607a6caf68dd9ac6e8addbcb"
1132+
uuid = "9449cd9e-2762-5aa3-a617-5413e99d722e"
1133+
version = "0.4.4"
1134+
11291135
[[deps.Tar]]
11301136
deps = ["ArgTools", "SHA"]
11311137
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1515
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
1616
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1717
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
18+
TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e"
1819
Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"

docs/src/MatrixTensorFactor.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ DEFAULT_ALPHA
4040
```@docs
4141
default_bandwidth
4242
make_densities
43+
make_densities2d
4344
standardize_KDEs
45+
standardize_2d_KDEs
4446
filter_inner_percentile
47+
filter_2d_inner_percentile
4548
```
4649

4750
## 2D

src/MatrixTensorFactor.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export nnmtf_proxgrad_online
1919

2020
export DEFAULT_ALPHA, DEFAULT_N_SAMPLES, MIN_STEP, MAX_STEP # Constants
2121
export IMPLIMENTED_OPTIONS, IMPLIMENTED_NORMALIZATIONS, IMPLIMENTED_PROJECTIONS, IMPLIMENTED_CRITERIA, IMPLIMENTED_STEPSIZES # implimented options
22-
export default_bandwidth, make_densities, standardize_KDEs, filter_inner_percentile # Functions
22+
export default_bandwidth, make_densities, standardize_KDEs, standardize_2d_KDEs, filter_inner_percentile, filter_2d_inner_percentile # Functions
2323
export repeatcoord, kde2d, coordzip # 2d density estimation functions
2424

2525
include("utils.jl")

src/densityestimation.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Filters elements so only the ones in the inner P percentile remain.
2+
Filters elements so only the ones in the inner P percentile remain. See [`filter_2d_inner_percentile`](@ref).
33
"""
44
filter_inner_percentile(v, P) = filter(_inrange(v, P), v)
55

@@ -97,7 +97,7 @@ function make_densities(
9797
#for (i, (measurement_values, b)) in enumerate(zip(data, bandwidths))
9898
for (i, measurement_values) in enumerate(data)
9999
# Estimate density based on the inner precentile to ignore outliers
100-
#measurement_values = filter_inner_percentile(measurement_values, inner_percentile)
100+
measurement_values = filter_inner_percentile(measurement_values, inner_percentile)
101101
density_estimates[i] = kde(measurement_values)#, bandwidth=b)
102102
end
103103

@@ -124,7 +124,7 @@ const DEFAULT_N_SAMPLES = 64::Integer
124124
"""
125125
standardize_KDEs(KDEs::AbstractVector{UnivariateKDE}; n_samples=DEFAULT_N_SAMPLES,)
126126
127-
Resample the densities so they all are smapled from the same domain.
127+
Resample the densities so they all are sampled from the same domain.
128128
"""
129129
function standardize_KDEs(KDEs; n_samples=DEFAULT_N_SAMPLES,)
130130
a = minimum(d -> d.x[begin], KDEs) # smallest left endpoint

src/densityestimation2d.jl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,70 @@
22
Holds functions relevent for making 2D kernel density estimation
33
"""
44

5+
"""
6+
Filters 2d elements so only the ones in the inner P percentile remain. See [`filter_inner_percentile`](@ref).
7+
"""
8+
filter_2d_inner_percentile(vs, P) = filter(_in2drange(vs, P), vs)
9+
10+
"""
11+
Returns a function that checks if each coordinate is in the inner P percentile of the values in vs.
12+
"""
13+
function _in2drange(vs, P)
14+
p_low = (100 - P) / 2
15+
p_high = 100 - p_low
16+
a, b = quantile([v[1] for v in vs], [p_low, p_high] ./ 100)
17+
c, d = quantile([v[2] for v in vs], [p_low, p_high] ./ 100)
18+
return x -> ((a x[1] b) && (c x[2] d))
19+
end
20+
21+
# TODO extend this to arbitrary number of dimentions
22+
23+
"""
24+
make_densities2d(s::Sink; kwargs...)
25+
make_densities2d(s::Sink, domains::AbstractVector{<:AbstractVector}; kwargs...)
26+
27+
Similar to [`make_densities`](@ref) but performs the KDE on 2 measurements jointly.
28+
"""
29+
function make_densities2d(
30+
data::AbstractVector{T};
31+
inner_percentile::Integer=100,
32+
#bandwidths::AbstractVector{<:Real}=default_bandwidth.(
33+
# collect(eachmeasurement(s)),DEFAULT_ALPHA,inner_percentile),
34+
) where T
35+
# Argument Handeling: check inner_percentile is a percentile
36+
(0 < inner_percentile <= 100) ||
37+
ArgumentError("inner_percentile must be between 0 and 100, got $inner_percentile")
38+
39+
#(length(data[begin]) == 2) ||
40+
# ArgumentError("should only be 2 measurements for the grain in s, got $length(getmeasurements(s))")
41+
42+
#data = filter_2d_inner_percentile(data)
43+
44+
KDE = kde(hcat(collect(array(g) for g in data)...)'; bandwidth=tuple(bandwidths...))
45+
return KDE
46+
end
47+
48+
"""
49+
standardize_2d_KDEs(KDEs::AbstractVector{BivariateKDE}; n_samples=DEFAULT_N_SAMPLES,)
50+
51+
Resample the densities so they all are sampled from the same x and y coordinates.
52+
"""
53+
function standardize_2d_KDEs(KDEs; n_samples=DEFAULT_N_SAMPLES,)
54+
a = minimum(f -> f.x[begin], KDEs) # smallest left endpoint
55+
b = maximum(f -> f.x[end] , KDEs) # biggest right endpoint
56+
c = minimum(f -> f.y[begin], KDEs) # smallest left endpoint
57+
d = maximum(f -> f.y[end] , KDEs) # biggest right endpoint
58+
59+
x_new = range(a, b, length=n_samples) # make the (larger) x-values range
60+
y_new = range(c, d, length=n_samples) # make the (larger) y-values range
61+
KDEs_new = pdf.(KDEs, (x_new,), (y_new,)) # Resample the densities on the new domain.
62+
# Note the second argument is a 1-tuple so that we can
63+
# broadcast over the first argument only, i.e.
64+
# KDEs_new[i] = pdf(KDEs[i], x_new)
65+
return KDEs_new, x_new, y_new
66+
end
67+
68+
569
"""
670
repeatcoord(coordinates, values)
771

0 commit comments

Comments
 (0)