@@ -163,6 +163,45 @@ def test_compute_linf_sum_contributions_histogram(self, testcase_name,
163
163
histogram .name )
164
164
self .assertListEqual (expected , histogram .bins )
165
165
166
+ @parameterized .parameters (False , True )
167
+ def test_compute_linf_sum_contributions_histogram_2_columns (
168
+ self , pre_aggregated : bool ):
169
+ # format: ((privacy_id, partition), value: tuple)
170
+ data = [((0 , 0 ), (1 , 10 )), ((0 , 1 ), (2 , 20 )), ((0 , 1 ), (3 , 30 )),
171
+ ((1 , 0 ), (5 , 50 ))]
172
+ backend = pipeline_dp .LocalBackend ()
173
+ expected = [
174
+ hist .Histogram (hist .HistogramType .LINF_SUM_CONTRIBUTIONS , [
175
+ hist .FrequencyBin (
176
+ lower = 1.0 , upper = 1.0004 , count = 1 , sum = 1 , max = 1 ),
177
+ hist .FrequencyBin (
178
+ lower = 4.9996 , upper = 5.0 , count = 2 , sum = 10 , max = 5 )
179
+ ]),
180
+ hist .Histogram (hist .HistogramType .LINF_SUM_CONTRIBUTIONS , [
181
+ hist .FrequencyBin (
182
+ lower = 10.0 , upper = 10.004 , count = 1 , sum = 10 , max = 10 ),
183
+ hist .FrequencyBin (
184
+ lower = 49.996 , upper = 50.0 , count = 2 , sum = 100 , max = 50 )
185
+ ])
186
+ ]
187
+ if pre_aggregated :
188
+ data = list (
189
+ pre_aggregation .preaggregate (
190
+ data ,
191
+ backend ,
192
+ data_extractors = pipeline_dp .DataExtractors (
193
+ privacy_id_extractor = lambda x : x [0 ][0 ],
194
+ partition_extractor = lambda x : x [0 ][1 ],
195
+ value_extractor = lambda x : x [1 ])))
196
+
197
+ compute_histograms = sum_histogram_computation ._compute_linf_sum_contributions_histogram_on_preaggregated_data
198
+ else :
199
+ compute_histograms = sum_histogram_computation ._compute_linf_sum_contributions_histogram
200
+ histograms = list (compute_histograms (data , backend ))
201
+ self .assertLen (histograms , 1 )
202
+ histograms = histograms [0 ]
203
+ self .assertListEqual (histograms , expected )
204
+
166
205
@parameterized .product (
167
206
(
168
207
dict (testcase_name = 'empty histogram' ,
@@ -307,6 +346,45 @@ def test_compute_partition_sum_histogram(self, testcase_name, input,
307
346
histogram .name )
308
347
self .assertListEqual (expected , histogram .bins )
309
348
349
+ @parameterized .parameters (False , True )
350
+ def test_compute_partition_sum_histogram_2_columns (self ,
351
+ pre_aggregated : bool ):
352
+ # format: ((privacy_id, partition), value: tuple)
353
+ data = [((0 , 0 ), (1 , 10 )), ((0 , 1 ), (2 , 20 )), ((0 , 1 ), (3 , 30 )),
354
+ ((1 , 0 ), (5 , 50 ))]
355
+ backend = pipeline_dp .LocalBackend ()
356
+ expected = [
357
+ hist .Histogram (hist .HistogramType .SUM_PER_PARTITION , [
358
+ hist .FrequencyBin (
359
+ lower = 5.0 , upper = 5.0001 , count = 1 , sum = 5 , max = 5 ),
360
+ hist .FrequencyBin (
361
+ lower = 5.9999 , upper = 6.0 , count = 1 , sum = 6 , max = 6 )
362
+ ]),
363
+ hist .Histogram (hist .HistogramType .SUM_PER_PARTITION , [
364
+ hist .FrequencyBin (
365
+ lower = 50.0 , upper = 50.001 , count = 1 , sum = 50 , max = 50 ),
366
+ hist .FrequencyBin (
367
+ lower = 59.999 , upper = 60.0 , count = 1 , sum = 60 , max = 60 )
368
+ ])
369
+ ]
370
+ if pre_aggregated :
371
+ data = list (
372
+ pre_aggregation .preaggregate (
373
+ data ,
374
+ backend ,
375
+ data_extractors = pipeline_dp .DataExtractors (
376
+ privacy_id_extractor = lambda x : x [0 ][0 ],
377
+ partition_extractor = lambda x : x [0 ][1 ],
378
+ value_extractor = lambda x : x [1 ])))
379
+
380
+ compute_histograms = sum_histogram_computation ._compute_partition_sum_histogram_on_preaggregated_data
381
+ else :
382
+ compute_histograms = sum_histogram_computation ._compute_partition_sum_histogram
383
+ histograms = list (compute_histograms (data , backend ))
384
+ self .assertLen (histograms , 1 )
385
+ histograms = histograms [0 ]
386
+ self .assertListEqual (histograms , expected )
387
+
310
388
311
389
if __name__ == '__main__' :
312
390
absltest .main ()
0 commit comments