@@ -42,16 +42,25 @@ def _get_histograms(self) -> hist.DatasetHistograms:
4242 computing_histograms .compute_dataset_histograms (
4343 dataset , data_extractors , pipeline_dp .LocalBackend ()))[0 ]
4444
45- def _get_estimator (
45+ def _get_estimator_for_count_and_privacy_id_count (
4646 self ,
4747 metric : pipeline_dp .Metric ,
4848 noise_kind : pipeline_dp .NoiseKind = pipeline_dp .NoiseKind .LAPLACE ,
4949 epsilon : float = 2 ** 0.5 / 2 ,
5050 delta : Optional [float ] = None ,
5151 ):
52- return histogram_error_estimator .create_error_estimator (
52+ return histogram_error_estimator .create_estimator_for_count_and_privacy_id_count (
5353 self ._get_histograms (), epsilon , delta , metric , noise_kind )
5454
55+ def _get_estimator_for_sum (
56+ self ,
57+ noise_kind : pipeline_dp .NoiseKind = pipeline_dp .NoiseKind .LAPLACE ,
58+ epsilon : float = 2 ** 0.5 / 2 ,
59+ delta : Optional [float ] = None ,
60+ ):
61+ return histogram_error_estimator .create_estimator_for_sum (
62+ self ._get_histograms (), epsilon , delta , noise_kind )
63+
5564 @parameterized .named_parameters (
5665 dict (testcase_name = 'count_gaussian' ,
5766 metric = pipeline_dp .Metrics .COUNT ,
@@ -90,25 +99,34 @@ def test_count_get_sigma(self, metric: pipeline_dp.Metric, epsilon: float,
9099 delta : Optional [float ],
91100 noise_kind : pipeline_dp .NoiseKind , l0 : float ,
92101 linf : float , expected : float ):
93- estimator = self ._get_estimator (metric = metric ,
94- epsilon = epsilon ,
95- delta = delta ,
96- noise_kind = noise_kind )
102+ estimator = self ._get_estimator_for_count_and_privacy_id_count (
103+ metric = metric , epsilon = epsilon , delta = delta , noise_kind = noise_kind )
97104 self .assertAlmostEqual (estimator ._get_stddev (l0 , linf ),
98105 expected ,
99106 delta = 1e-10 )
100107
101108 def test_sum_not_supported (self ):
102109 with self .assertRaisesRegex (
103110 ValueError , "Only COUNT and PRIVACY_ID_COUNT are supported" ):
104- self ._get_estimator (pipeline_dp .Metrics .SUM )
111+ self ._get_estimator_for_count_and_privacy_id_count (
112+ pipeline_dp .Metrics .SUM )
105113
106114 @parameterized .parameters ((0 , 1 ), (1 , 9 / 11 ), (2 , 8 / 11 ), (3 , 7 / 11 ),
107115 (9 , 1 / 11 ), (10 , 0 ), (20 , 0 ))
108116 # there are 11 (privacy_id, partition) pairs (from 2 privacy units), when
109117 # l0_bound=1, 9 are dropped (from 1 privacy unit).
110118 def test_get_ratio_dropped_l0 (self , l0_bound , expected ):
111- estimator = self ._get_estimator (pipeline_dp .Metrics .COUNT )
119+ estimator = self ._get_estimator_for_count_and_privacy_id_count (
120+ pipeline_dp .Metrics .COUNT )
121+ self .assertAlmostEqual (estimator .get_ratio_dropped_l0 (l0_bound ),
122+ expected )
123+
124+ @parameterized .parameters ((0 , 1 ), (1 , 9 / 11 ), (2 , 8 / 11 ), (3 , 7 / 11 ),
125+ (9 , 1 / 11 ), (10 , 0 ), (20 , 0 ))
126+ # there are 11 (privacy_id, partition) pairs (from 2 privacy units), when
127+ # l0_bound=1, 9 are dropped (from 1 privacy unit).
128+ def test_get_ratio_dropped_l0_for_sum (self , l0_bound , expected ):
129+ estimator = self ._get_estimator_for_sum ()
112130 self .assertAlmostEqual (estimator .get_ratio_dropped_l0 (l0_bound ),
113131 expected )
114132
@@ -117,7 +135,19 @@ def test_get_ratio_dropped_l0(self, l0_bound, expected):
117135 # there are 30 rows (from 2 privacy units), when linf_bound=1, 19 are
118136 # dropped (from 1 privacy unit, which contributes 20 to 1 partition).
119137 def test_get_ratio_dropped_linf (self , linf_bound , expected ):
120- estimator = self ._get_estimator (pipeline_dp .Metrics .COUNT )
138+ estimator = self ._get_estimator_for_count_and_privacy_id_count (
139+ pipeline_dp .Metrics .COUNT )
140+ self .assertAlmostEqual (estimator .get_ratio_dropped_linf (linf_bound ),
141+ expected )
142+
143+ @parameterized .parameters ((0 , 1 ), (0.5 , 0.89 ), (1 , 0.78 ), (2 , 0.76 ),
144+ (40 , 0 ))
145+ # there 1 is contribution of 40 and 10 contribution of 1.
146+ # total contribution = 1*40+10*1 = 50
147+ # when linf_bound = 0.5, left after contribution bounding 11*0.5=5.5, i.e.
148+ # dropped (50-5.5)/50 = 0.89
149+ def test_get_ratio_dropped_linf_for_sum (self , linf_bound , expected ):
150+ estimator = self ._get_estimator_for_sum ()
121151 self .assertAlmostEqual (estimator .get_ratio_dropped_linf (linf_bound ),
122152 expected )
123153
@@ -138,10 +168,15 @@ def test_get_ratio_dropped_linf(self, linf_bound, expected):
138168 # rmse2 = sqrt(21*total_ratio_dropped + noise_stddev**2) ~= 19.70177
139169 # rmse = (9*rmse1+rmse2)/10.
140170 def test_estimate_rmse_count (self , l0_bound , linf_bound , expected ):
141- estimator = self ._get_estimator (pipeline_dp .Metrics .COUNT )
171+ estimator = self ._get_estimator_for_count_and_privacy_id_count (
172+ pipeline_dp .Metrics .COUNT )
142173 self .assertAlmostEqual (estimator .estimate_rmse (l0_bound , linf_bound ),
143174 expected )
144175
176+ def test_estimate_rmse_sum (self ):
177+ estimator = self ._get_estimator_for_sum ()
178+ self .assertAlmostEqual (estimator .estimate_rmse (1 , 1 ), 5.93769917 )
179+
145180
146181if __name__ == '__main__' :
147182 absltest .main ()
0 commit comments