diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb
new file mode 100644
index 00000000..dd52a036
--- /dev/null
+++ b/examples/quickstart.ipynb
@@ -0,0 +1,405 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "name": "PipelineDP Quick Start ",
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bW1gifIe0pUt"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3Pa1EeIdJyZn"
+ },
+ "source": [
+ "This is a simple example that shows how to calculate anonymized statistics using PipelineDP. The input data is a simulated dataset of visits to some restaurant during a 7 day period. Each visit is characterized by a visitor ID, the entry date, and the amount of money spent. In this colab we use Pipeline DP\n",
+ "Core API to calculate the count of restaurant visits per day.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zxcPpZGuAPq8"
+ },
+ "source": [
+ "# Install dependencies and download data\n",
+ "\n",
+ "Run the code below to install the necessary dependencies, load and explore the input data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "E8yzpKYNbHTF",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "outputId": "0e60ad12-094a-4e0d-9c44-d8377accc47c",
+ "cellView": "form"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " user_id \n",
+ " enter_time \n",
+ " spent_minutes \n",
+ " spent_money \n",
+ " day \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 580 \n",
+ " 9:27AM \n",
+ " 29 \n",
+ " 17 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1215 \n",
+ " 9:16AM \n",
+ " 45 \n",
+ " 18 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 448 \n",
+ " 11:55AM \n",
+ " 12 \n",
+ " 16 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 125 \n",
+ " 10:47AM \n",
+ " 27 \n",
+ " 20 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 484 \n",
+ " 11:08AM \n",
+ " 35 \n",
+ " 13 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " user_id enter_time spent_minutes spent_money day\n",
+ "0 580 9:27AM 29 17 1\n",
+ "1 1215 9:16AM 45 18 1\n",
+ "2 448 11:55AM 12 16 1\n",
+ "3 125 10:47AM 27 20 1\n",
+ "4 484 11:08AM 35 13 1"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 1
+ }
+ ],
+ "source": [
+ "#@markdown Install dependencies and download data\n",
+ "\n",
+ "import os\n",
+ "os.chdir('/content')\n",
+ "!git clone https://github.com/OpenMined/PipelineDP.git\n",
+ "!pip install -r PipelineDP/requirements.dev.txt\n",
+ "\n",
+ "import sys\n",
+ "sys.path.insert(0,'/content/PipelineDP')\n",
+ "\n",
+ "#Download restaurant dataset from github\n",
+ "!wget https://raw.githubusercontent.com/google/differential-privacy/main/examples/go/data/week_data.csv\n",
+ "\n",
+ "from IPython.display import clear_output\n",
+ "clear_output()\n",
+ "\n",
+ "import apache_beam as beam\n",
+ "from apache_beam.runners.portability import fn_api_runner\n",
+ "from apache_beam.runners.interactive import interactive_runner\n",
+ "from apache_beam.runners.interactive.interactive_beam import *\n",
+ "import pyspark\n",
+ "from dataclasses import dataclass\n",
+ "import pipeline_dp\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = pd.read_csv('week_data.csv')\n",
+ "df.rename(inplace=True, columns={'VisitorId' : 'user_id', 'Time entered' : 'enter_time', 'Time spent (minutes)' : 'spent_minutes', 'Money spent (euros)' : 'spent_money', 'Day' : 'day'})\n",
+ "rows = [index_row[1] for index_row in df.iterrows()]\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Run the pipeline"
+ ],
+ "metadata": {
+ "id": "hzPiLxByC5BJ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Set the backend to local backend. Other options (Beam or Spark)\n",
+ "# are possible.\n",
+ "backend = pipeline_dp.LocalBackend()\n",
+ "\n",
+ "# Define the total budget.\n",
+ "budget_accountant = pipeline_dp.NaiveBudgetAccountant(total_epsilon=1, total_delta=1e-6)\n",
+ "\n",
+ "# Create DPEngine which will execute the logic.\n",
+ "dp_engine = pipeline_dp.DPEngine(budget_accountant, backend)\n",
+ "\n",
+ "# Define privacy ID, partition key and aggregated value extractors.\n",
+ "# The aggregated value extractor isn't used in this example.\n",
+ "data_extractors = pipeline_dp.DataExtractors(\n",
+ " partition_extractor=lambda row: row.day,\n",
+ " privacy_id_extractor=lambda row: row.user_id,\n",
+ " value_extractor=lambda row: 1)\n",
+ "\n",
+ "# Configure the aggregation parameters.\n",
+ "params = pipeline_dp.AggregateParams(\n",
+ " noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
+ " # This example computes only count but we can compute multiple\n",
+ " # ... metrics at once.\n",
+ " metrics=[pipeline_dp.Metrics.COUNT],\n",
+ " # Limits visits contributed by a visitor. A visitor can contribute to\n",
+ " # ... up to 3 days \n",
+ " max_partitions_contributed=3,\n",
+ " # ... and up to 2 visits per day. \n",
+ " max_contributions_per_partition=2,\n",
+ " # Configure the output partition keys as they are publicly known.\n",
+ " # The output should include all week days.\n",
+ " public_partitions=list(range(1, 8)))\n",
+ "\n",
+ "# Create a computational graph for the aggregation.\n",
+ "# All computations are lazy. dp_result is iterable, but iterating it would\n",
+ "# fail until budget is computed (below).\n",
+ "# It’s possible to call DPEngine.aggregate multiple times with different\n",
+ "# metrics to compute.\n",
+ "dp_result = dp_engine.aggregate(rows, params, data_extractors)\n",
+ "\n",
+ "# Compute budget per each DP operation. \n",
+ "budget_accountant.compute_budgets()\n",
+ "\n",
+ "# Here's where the lazy iterator initiates computations and gets transformed\n",
+ "# into actual results\n",
+ "dp_result = list(dp_result)\n"
+ ],
+ "metadata": {
+ "id": "rFj2u61qBx0r"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Inspect the result"
+ ],
+ "metadata": {
+ "id": "hfHqnCLcDqpU"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@markdown ##Inspect the result\n",
+ "#@markdown Below you can see the DP and non-DP results.\n",
+ "\n",
+ "# Compute non-DP result\n",
+ "non_dp_count = [0] * 7\n",
+ "days = range(1, 7)\n",
+ "for row in rows:\n",
+ " index = row['day'] - 1\n",
+ " non_dp_count[index] += 1\n",
+ "\n",
+ "# Copy the DP result to a list\n",
+ "dp_count = [0] * 7 \n",
+ "for count_sum_per_day in dp_result:\n",
+ " index = count_sum_per_day[0] - 1\n",
+ " dp_count[index] = count_sum_per_day[1][0]\n",
+ "\n",
+ "days = [\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]\n",
+ "x = np.arange(len(days))\n",
+ "\n",
+ "width = 0.35\n",
+ "fig, ax = plt.subplots()\n",
+ "rects1 = ax.bar(x - width/2, non_dp_count, width, label='non-DP')\n",
+ "rects2 = ax.bar(x + width/2, dp_count, width, label='DP')\n",
+ "ax.set_ylabel('Visit count')\n",
+ "ax.set_title('Count visits per day')\n",
+ "ax.set_xticks(x)\n",
+ "ax.set_xticklabels(days)\n",
+ "ax.legend()\n",
+ "fig.tight_layout()\n",
+ "plt.show()\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 297
+ },
+ "id": "sTkYZ0wSbo3h",
+ "outputId": "80ab959d-5a2a-4901-fe10-2b99c1bd090b",
+ "cellView": "form"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ]
+ }
+ ]
+}