-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.tex
188 lines (153 loc) · 8.42 KB
/
analysis.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
% --------------------------------------------------------------
% This is all preamble stuff that you don't have to worry about.
% Head down to where it says "Start here"
% --------------------------------------------------------------
\documentclass[12pt]{article}
\usepackage[margin=1in]{geometry}
\usepackage{amsmath,amsthm,amssymb}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{hyperref}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Z}{\mathbb{Z}}
\newenvironment{theorem}[2][Theorem]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{lemma}[2][Lemma]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{exercise}[2][Exercise]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{problem}[2][Problem]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{question}[2][Question]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{corollary}[2][Corollary]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{solution}{\begin{proof}[Solution]}{\end{proof}}
\begin{document}
% --------------------------------------------------------------
% Start here
% --------------------------------------------------------------
\title{Analysis}
\author{yourGrand}
\maketitle
Consider a stream $a_1, \dots, a_m$ where each $a_i \in \{1, \dots, n\}$
\begin{problem}{1}
Give a randomised streaming algorithm which approximates\\ the sum $a_1 + \dots + a_m$ using $O(\log{\log{m}} + \log{n})$ space\\
\begin{algorithm}
\caption{Approximate Stream Sum}
\begin{algorithmic}[1]
\Procedure{Increment}{$counters, a_i, c$} \Comment{Increment operation}
\State $rands \gets \text{array of random values between } 0 \text{ and } 1 \text{ of size } c$
\State $probs \gets (2^{-counters}) \times a_i$ \Comment{Element-wise computation}\\
\For{each element $c_j$ in $counters$}
\If{$rands[j] < probs[j]$}
\State $c_j \gets c_j + 1$
\EndIf
\EndFor
\EndProcedure\\
\Function{Estimate}{$counters$} \Comment{Estimate total}
\State $estimates \gets 2^{counters} - 1$ \Comment{Element-wise computation}
\State \Return $\text{mean}(estimates)$
\EndFunction\\
\Function{Approximate\_Stream\_Sum}{$stream, c$}
\State Initialise $counters \gets \text{array of zeros of size } c$\\
\For{each element $a_i$ in stream}
\State \Call{Increment}{$counters, a_i, c$}
\EndFor\\
\State \Return \Call{Estimate}{$counters$}
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{theorem}{1}
The streaming algorithm provides an unbiased estimator of the stream sum
\[
S = \sum_{i=1}^m a_i.
\]
\end{theorem}
\begin{proof}
The algorithm aims to approximate the stream sum by incrementing counters with a certain probability and using the counter value to estimate the sum.
Let \(X_i = 2^{C_m^{(i)}} - 1\) represent the estimate from the \(i\)-th counter at the end of the stream.
Let the value of the counter at time \(t\) be \(C_t\).
\paragraph{Step 1: Single Counter Estimate}
For a single counter, the expected change in \(2^{C_t} - 1\) at time \(t\) is given by:
\[
\Delta_t =
\begin{cases}
2^{h+1} - 2^h, & \text{if increment occurs} \\
0, & \text{otherwise.}
\end{cases}
\]
The probability of an increment, conditioned on the counter value \(C_{t-1} = h\), is:
\[
P(\text{increment} \mid C_{t-1} = h) = a_t \cdot 2^{-h}.
\]
The expected value of \(\Delta_t\) is then:
\[
\mathbb{E}[\Delta_t \mid C_{t-1} = h] = (2^{h+1} - 2^h) \cdot P(\text{increment} \mid C_{t-1} = h).
\]
Substituting \(P(\text{increment})\):
\[
\mathbb{E}[\Delta_t \mid C_{t-1} = h] = (2^{h+1} - 2^h) \cdot (a_t \cdot 2^{-h}) = a_t \cdot (2 - 1) = a_t.
\]
Therefore, the expected change in \(2^{C_t} - 1\) at time \(t\) is exactly \(a_t\).
\paragraph{Step 2: Final Estimate}
By linearity of expectation:
\[
\mathbb{E}[2^{C_m} - 1] = \mathbb{E}\left[\sum_{t=1}^m \Delta_t\right] = \sum_{t=1}^m \mathbb{E}[\Delta_t] = \sum_{t=1}^m a_t = S.
\]
\paragraph{Step 3: Multiple Counters}
The final estimator $Y$ is the mean of $c$ independent random variables $X_1, X_2, \dots, X_c$ each representing an estimate from one counter.
When using \(c\) independent counters, the final estimate is:
\[
Y = \frac{1}{c} \sum_{i=1}^c X_i, \quad \text{where } X_i = 2^{C_m^{(i)}} - 1.
\]
Since each counter is independent and unbiased, the estimator \(Y\) is also unbiased:
\[
\mathbb{E}[Y] = \frac{1}{c} \sum_{i=1}^c \mathbb{E}[X_i] = \frac{1}{c} \sum_{i=1}^c S = S.
\]
\paragraph{Step 4: Variance of the Estimator}
The variance of a single counter estimate \(X_1 = 2^{C_m^{(1)}} - 1\) is:
\[
\text{Var}[X_1] = \mathbb{E}[X_1^2] - \mathbb{E}[X_1]^2.
\]
If \(X_1, X_2, \dots, X_c\) are independent random variables with the same variance \(\text{Var}[X_1]\), the variance of their mean is:
\[
\text{Var}[Y] = \text{Var}\left(\frac{1}{c} \sum_{i=1}^c X_i\right).
\]
By the properties of variance:
\[
\text{Var}[Y] = \frac{1}{c^2} \sum_{i=1}^c \text{Var}[X_i].
\]
Since all the \(X_i\) have the same variance \(\text{Var}[X_1]\) because they are derived from identical update rules and are independent, we get:
\[
\text{Var}[Y] = \frac{1}{c^2} \cdot c \cdot \text{Var}[X_1] = \frac{\text{Var}[X_1]}{c}.
\]
\paragraph{Conclusion:}
The algorithm provides an unbiased estimator for \(S\), with variance that decreases as \(O(1/c)\). By adjusting \(c\), the trade-off between accuracy and space complexity can be controlled.
\end{proof}
\paragraph{Space Complexity Analysis:}
\begin{itemize}
\item In this algorithm, we increment each counter with a probability of
\( (2^{-\text{counters}}) \times a_i \), where \( a_i \) is the value from the stream.
\item The effect of multiplying by \( a_i \) is equivalent to splitting each value in the stream into 1s.
For example, a value of 6 would be represented as six 1s, and each 1 would increment the
counter with probability \( 2^{-\text{counter}} \), as in the original Morris's algorithm.
\item Morris's algorithm achieves space efficiency by approximating $x$ with a space complexity of
\( O(\log(\log(x))) \), where \( x \) is the value being approximated.
In our case, the value \( x \) is \( m \times n \), where \( m \) is the number of elements
in the stream, and \( n \) is the maximum value of any element in the stream.
\item Therefore, the space complexity of our algorithm in bits is \( O(c \times \log(\log(m \times n))) \),
where $c$ is the number of counters.
\item Since the number of counters \( c \) is a constant, the space complexity in bits simplifies to
\( O(\log(\log(m \times n))) = O(\log(\log(m) + \log(n)))\), which is smaller than the upper bound
\( O(\log(\log(m)) + \log(n)) \).
\end{itemize}
Thus, the space complexity of the algorithm is \( O(\log(\log(m) + \log(n))) \).
\paragraph{Note:}
The implementation of this algorithm in Python can be found on my GitHub:\\
\url{https://github.com/yourGrand/approximate_stream_sum}
\end{problem}
% --------------------------------------------------------------
% You don't have to mess with anything below this line.
% --------------------------------------------------------------
\end{document}