Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

confidence interval text + histogram table #211

Merged
merged 11 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions dp_wizard/app/analysis_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from dp_wizard.app.components.inputs import log_slider
from dp_wizard.app.components.column_module import column_ui, column_server
from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
from dp_wizard.utils.dp_helper import confidence
from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
from dp_wizard.utils.code_generators import make_privacy_loss_block

Expand Down Expand Up @@ -44,18 +43,12 @@ def analysis_ui():
ui.card(
ui.card_header("Simulation"),
ui.markdown(
f"""
"""
This simulation will assume a normal distribution
between the specified lower and upper bounds.
Until you make a release, your CSV will not be
read except to determine the columns.

The actual value is within the error bar
with {int(confidence * 100)}% confidence.
"""
),
ui.markdown(
"""
What is the approximate number of rows in the dataset?
This number is only used for the simulation
and not the final calculation.
Expand Down
76 changes: 50 additions & 26 deletions dp_wizard/app/components/column_module.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from logging import info

from htmltools.tags import details, summary
from shiny import ui, render, module, reactive, Inputs, Outputs, Session
from shiny.types import SilentException

from dp_wizard.utils.dp_helper import make_accuracy_histogram
from dp_wizard.utils.shared import plot_histogram
from dp_wizard.utils.code_generators import make_column_config_block
from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip, hide_if
from dp_wizard.utils.dp_helper import confidence


default_weight = "2"
Expand Down Expand Up @@ -42,12 +45,7 @@ def column_ui(): # pragma: no cover
),
ui.output_ui("optional_weight_ui"),
],
[
ui.output_plot("column_plot", height="300px"),
# Make plot smaller than default:
# about the same size as the other column.
output_code_sample("Column Definition", "column_code"),
],
ui.output_ui("histogram_preview_ui"),
col_widths=col_widths, # type: ignore
),
)
Expand Down Expand Up @@ -97,6 +95,27 @@ def _set_bins():
def _set_weight():
weights.set({**weights(), name: input.weight()})

@reactive.calc()
def accuracy_histogram():
lower_x = float(input.lower())
upper_x = float(input.upper())
bin_count = int(input.bins())
weight = float(input.weight())
weights_sum = sum(float(weight) for weight in weights().values())
info(f"Weight ratio for {name}: {weight}/{weights_sum}")
if weights_sum == 0:
# This function is triggered when column is removed;
# Exit early to avoid divide-by-zero.
raise SilentException("weights_sum == 0")
return make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
contributions=contributions,
weighted_epsilon=epsilon * weight / weights_sum,
)

@render.text
def card_header():
return name
Expand Down Expand Up @@ -165,26 +184,31 @@ def column_code():
bin_count=int(input.bins()),
)

@render.plot()
def column_plot():
lower_x = float(input.lower())
upper_x = float(input.upper())
bin_count = int(input.bins())
weight = float(input.weight())
weights_sum = sum(float(weight) for weight in weights().values())
info(f"Weight ratio for {name}: {weight}/{weights_sum}")
if weights_sum == 0:
# This function is triggered when column is removed;
# Exit early to avoid divide-by-zero.
return None
accuracy, histogram = make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
contributions=contributions,
weighted_epsilon=epsilon * weight / weights_sum,
)
@render.ui
def histogram_preview_ui():
accuracy, histogram = accuracy_histogram()
return [
ui.output_plot("histogram_preview_plot", height="300px"),
ui.layout_columns(
ui.markdown(
f"The {confidence:.0%} confidence interval is ±{accuracy:.3g}."
),
details(
summary("Data Table"),
ui.output_data_frame("data_frame"),
),
output_code_sample("Column Definition", "column_code"),
),
]

@render.data_frame
def data_frame():
accuracy, histogram = accuracy_histogram()
return render.DataGrid(histogram)

@render.plot
def histogram_preview_plot():
accuracy, histogram = accuracy_histogram()
s = "s" if contributions > 1 else ""
title = (
f"Simulated {name}: normal distribution, "
Expand Down
4 changes: 1 addition & 3 deletions dp_wizard/utils/dp_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Any

import polars as pl
import opendp.prelude as dp

Expand All @@ -19,7 +17,7 @@ def make_accuracy_histogram(
bin_count: int,
contributions: int,
weighted_epsilon: float,
) -> tuple[float, Any]:
) -> tuple[float, pl.DataFrame]:
"""
Creates fake data between lower and upper, and then returns a DP histogram from it.
>>> accuracy, histogram = make_accuracy_histogram(
Expand Down
4 changes: 4 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def expect_no_error():
page.get_by_label("grade").check()
expect_visible(simulation)
assert page.get_by_label("Upper").input_value() == new_value
expect_visible("The 95% confidence interval is ±794")
page.get_by_text("Data Table").click()
expect_visible("(0, 2]")

# Add a second column:
# page.get_by_label("blank").check()
# TODO: Test is flaky?
Expand Down
Loading