ODEsolver.tex

\documentclass[10pt]{amsart}
\input{/Users/longchen1/Dropbox/Math/TexDocument/mysetting.tex}

\begin{document}
\title{ODE Solvers: One-Step Methods}
\author{Long Chen}\date{\today}
\begin{abstract}
This is my brief summary of Chapter 5 and 6 of the book {\em Numerical Analysis} by {\em Gautschi, Walter}. Discussion is simplified and more figures are provided for a better illustration. 
\end{abstract}
\maketitle

\tableofcontents

We consider numerical methods for solving the nonlinear ODE  
\begin{equation}\label{ODE}  
y' = f(t,y),\quad y(a) = y_0,  
\end{equation}  
where $t \in \mathbb{R}$ is the independent variable, $y = y(t) \in \mathbb{R}^d$ may be a vector-valued function, and the function $f(t, y)$ is given. Assume $f(t, y)$ is Lipschitz continuous with respect to $y$, i.e., $\|f(t, y_1) - f(t, y_2)\| \leq L\|y_1 - y_2\|$. Under this condition, the solution $y$ to \eqref{ODE} exists and is unique, at least in a neighborhood of $a$. We further assume that the solution exists for all $t \in \mathbb{R}$. The focus here is on how to compute a numerical approximation of $y$.  

A noticeable difference from the textbook {\em Numerical Analysis} by {\em Gautschi, Walter} is that the independent variable is changed from $x$ to $t$, which more naturally represents time.  


\section{Schemes}
\subsection{Forward Euler Method}  
Besides the initial value $y_0$, equation \eqref{ODE} also provides the initial derivative $y'(a) = f(a, y_0)$. Using the Taylor expansion, we obtain  
$$y(a+h) \approx y(a) + h y'(a) = y_0 + h f(a, y_0).$$  
To clearly distinguish between the exact solution $y$ to \eqref{ODE} and its numerical approximation, we introduce the notation $u_n$ for the numerical approximation, where the subscript $n$ corresponds to the grid point $t_n$. The step size, which may vary at each iteration, is denoted by $h_n$.  

The simplest forward (explicit) Euler method can be summarized as follows:  

\medskip  
\begin{tcolorbox}[colframe=black!15!white, coltitle=white!5!black, title = \bf Forward/Explicit Euler Method]  
Let $t_0 = a$, $u_0 = y_0$. For $n = 0, 1, 2, \dots$,  
\begin{align*}  
t_{n+1} &= t_n + h_n, \\  
u_{n+1} &= u_n + h_n f(t_n, u_n).  
\end{align*}  
\end{tcolorbox}  

Geometrically, the exact solution $y$ to \eqref{ODE} is a curve, and the Euler method approximates this curve using a polygon; see Fig. \ref{fig:ODEsolver}. Intuitively, as the length of each side of the polygon approaches zero, the numerical approximation can closely approximate the true solution. Numerical analysis provides a mathematical framework to rigorously analyze the error and, when possible, improve the methods.  

\begin{figure}[htbp]
\begin{center}
\includegraphics[width=7cm]{figures/ODEsolver.pdf}
\caption{An example of one step method.}
\label{fig:ODEsolver}
\end{center}
\end{figure}

\medskip
\noindent {\bf Question}: Euler method can be understood as moving along the tangential direction with a small step size. Is there any better direction to move? 
\medskip

\subsection{One step method and truncation error}
We first provide a measure of how ``good" a method is. Consider a single interval $[t_n, t_{n+1}]$. The step size $h_n := t_{n+1} - t_n$ may vary but is usually uniform. A generic form of a one-step method is given by  
\begin{equation}\label{onestep}  
u_{n+1} = u_n + h_n \Phi(t_n, u_n; h_n),  
\end{equation}  
where $\Phi(t_n, u_n; h_n)$ represents the direction of movement starting from $(t_n, u_n)$. For the forward Euler method, $\Phi(t_n, u_n; h_n) = f(t_n, u_n)$.  

Now consider the ODE \eqref{ODE}, $y' = f(t, y)$, restricted to the interval $[t_n, t_{n+1}]$ with the initial condition $y(t_n) = y_n$. The exact solution value at $t_{n+1}$ is denoted by $y_{n+1} = y(t_{n+1})$.  

Assume $u_n = y_n$ and use the error $u_{n+1} - y_{n+1}$ as a measure of accuracy. It is easy to see that this error is proportional to the step size. To provide a more consistent comparison, we define the normalized quantity  
$$T(t_n, y_n; h_n) := \frac{1}{h_n}(u_{n+1} - y_{n+1}),$$  
which is referred to as the {\em truncation error}.  

For simplicity, we omit the subscript $n$ and use the subscript $_{\rm next}$ to indicate $n+1$. With this notation, the truncation error can be rewritten as  
$$T(t, y; h) = \frac{1}{h}(u_{\rm next} - y_{\rm next}).$$  
This relationship is summarized in the following figure.  

\begin{figure}[htbp]
\begin{center}
\includegraphics[width=7cm]{figures/TruncationError.pdf}
\caption{One step method and its truncation error.}
\label{fig:Th}
\end{center}
\end{figure}

Note that we add a negative sign in front of the truncation error in Fig. \ref{fig:Th}. The reason is that the length of a line segment is always positive, but by the definition of $T$, it can take negative values. For the example in the figure, $T$ is negative, and $-hT$ represents the distance.  

\begin{definition}[Order of a method]
The method is said to have order $p$ if, for some vector norm $\|\cdot\|$,  
\begin{equation}\label{eq:order_p}
\|T(t, y; h)\| \leq C h^p,  
\end{equation}  
uniformly on $[a, b] \subset \mathbb{R}^d$, with a constant $C$ not depending on $t$, $y$, or $h$.  
We express this property briefly as  
\begin{equation}\label{eq:order_bigO}
T(t, y; h) = \mathcal{O}(h^p), \quad h \to 0.  
\end{equation}  

Note that $p > 0$ implies consistency. Usually, $p$ is an integer $\geq 1$. It is called the \emph{exact order} if \eqref{eq:order_p} does not hold for any larger $p$.  
\end{definition}

\begin{definition}[Principal error function]
A function $\tau: [a, b] \subset \mathbb{R}^d \to \mathbb{R}^d$ that satisfies $\Psi(t, y) \neq 0$ and  
\begin{equation}\label{eq:principal_error}
T(t, y; h) = \tau(t, y) h^p + \mathcal{O}(h^{p+1}), \quad h \to 0,  
\end{equation}  
is called the \emph{principal error function}.
\end{definition}

Using the definition of the one-step method (cf. \eqref{onestep}) and the fact that $y' = f(t, y)$, the truncation error can also be expressed as  
\begin{equation}\label{eq:Tphi}  
T(t, y; h) = \Phi(t, y; h) - \frac{1}{h} \int_{t}^{t+h} f(s, y(s)) \, \dd s.  
\end{equation}  
This shows that the truncation error represents the discrepancy between the numerical quadrature $\Phi(t, y; h)$ and the average of $f(t, y(t))$ over the interval $[t, t+h]$.  

From this perspective, we can immediately see that the forward Euler method has a first-order truncation error:  
$$  
T_{\rm Euler}(t, y; h) = f(t, y(t)) - \frac{1}{h} \int_{t}^{t+h} f(s, y(s)) \, \dd s = \mathcal{O}(h).  
$$  
If we use the midpoint rule or the trapezoidal rule for the integral, the order of accuracy improves to second order. In general, numerical quadrature rules of arbitrary order can be applied.  

%However, what additional challenges arise when using such methods to solve ODEs?  
For instance, by performing a Taylor expansion at $t + h/2$, we find:  
$$  
f\left(t + \frac{h}{2}, y\left(t + \frac{h}{2}\right)\right) - \frac{1}{h} \int_{t}^{t+h} f(s, y(s)) \, \dd s = \mathcal{O}(h^2).  
$$  
What is the extra difficulty here compared to standard numerical quadrature?  

\subsection{Second-order methods}
The difference and difficulty lie in the fact that $y(t + \frac{h}{2})$ is unknown; we only have the initial value $y(t)$. How can we address this? One approach is to use the forward Euler method to advance by $h/2$ and obtain $u_{\rm half}$ as an approximation of $y(t + \frac{h}{2})$. Specifically, we approximate $f(t + \frac{h}{2}, y(t + \frac{h}{2}))$ with $f(t + \frac{h}{2}, u_{\rm half})$. This leads to the update formula:  
$$  
u_{\rm next} = u + h f\left(t + \frac{h}{2}, u + \frac{h}{2} f(t, u)\right).  
$$  

In practice, we can unfold the nested expressions of $f$ to derive the following improved Euler method:  

\medskip  
\begin{tcolorbox}[colframe=black!15!white, coltitle=white!5!black, title = \bf Improved Euler Method]  
Let $t_0 = a$, $u_0 = y_0$. For $n = 0, 1, 2, \dots$,  
\begin{align*}  
k_1 &= f(t_n, u_n), \\  
k_2 &= f(t_n + h_n/2, u_n + k_1 h_n/2), \\  
u_{n+1} &= u_n + h_n k_2.  
\end{align*}  
\end{tcolorbox}  

The direction $k_2$ is an improvement over $k_1$ because it reduces the truncation error to $\mathcal{O}(h^2)$.  

Similarly, by modifying the trapezoidal method for numerical quadrature, we obtain a method known as the Heun method. It is also a second-order method.  

\medskip
\begin{tcolorbox}[colframe=black!15!white, coltitle=white!5!black, title = \bf Heun Method]
Let $t_0=a,\, u_0 = y_0$. For $n=0,1,2,...,$
\begin{align*}
k_1 &= f(t_n, u_n),\\
k_2 & = f(x_n + h_n, u_n + k_1h_n),\\
u_{n+1} &= u_n + h_n (k_1 + k_2)/2.
\end{align*}
\end{tcolorbox}

\begin{figure}[htbp]
\begin{center}
\includegraphics[width=7.25cm]{figures/EulerMethod.pdf}
\caption{Forward Euler, improved Euler and Heun methods}
\label{fig:EulerMethod}
\end{center}
\end{figure}


Notice that both the modified Euler method and Heun's method require evaluating the slope twice; see Fig.~\ref{fig:EulerMethod}. These are special cases of two-stage methods, which will be discussed below.  

\subsection{Two-stage methods}  

A generic form of two-stage methods is given by  
\begin{equation}\label{eq:twostage}  
u_{\rm next} = u + (\alpha_1 h) k_1 + (\alpha_2 h) k_2,  
\end{equation}  
where the weight $\alpha_1 \in [0,1)$, and $\alpha_1 + \alpha_2 = 1$. The geometric interpretation is that we first move along the direction $k_1$ with a step size of $\alpha_1 h$ and then switch to a better direction $k_2$ for the remaining step size.  

We introduce another parameter $\mu \in (0,1]$ and treat $k_2$ as an approximation of the slope at $(t + \mu h, y(t + \mu h))$. Since $y(t + \mu h)$ is unknown, it is approximated using the forward Euler method, i.e., $k_2 = f(t + \mu h, y + \mu h k_1)$.  

Next, we aim to choose the parameters $(\alpha_1, \alpha_2, \mu)$ to minimize the truncation error as formulated in \eqref{eq:Tphi}. The primary tool for this is a Taylor expansion at the left endpoint $(t, y)$. First,  
\begin{align*}  
k_2 &= f(t + \mu h, y + \mu h k_1) \\  
&= f + \mu h f_t + \mu h k_1 f_y + \frac{1}{2} \left[ \mu^2 h^2 f_{tt} + 2 \mu^2 h^2 f_{ty} k_1 + \mu^2 h^2 k_1^{\intercal} f_{yy} k_1 \right] + \mathcal{O}(h^3).  
\end{align*}  
Recall that $k_1 = f = f(t, y)$. Substituting this into the expression for $\Phi(t, y; h)$, we obtain  
\begin{align*}  
\Phi(t, y; h) &= \alpha_1 k_1 + \alpha_2 k_2 \\  
&= f + \alpha_2 \mu h (f_t + f_y f) + \frac{\alpha_2}{2} \mu^2 h^2 \left[ f_{tt} + 2 f_{ty} f + f^{\intercal} f_{yy} f \right] + \mathcal{O}(h^3).  
\end{align*}  

Then we compare $\Phi(t, y; h)$ with $\frac{1}{h} \int_{t}^{t+h} f(s, y(s)) \, \dd s$. We expand the integrand \( f(s, y(s)) \) at \((t, y)\):  
\begin{equation}\label{eq:fTaylor}  
f(s, y(s)) = f + \frac{\dd f}{\dd s}(s - t) + \frac{1}{2} \frac{\dd^2 f}{\dd s^2}(s - t)^2 + \mathcal{O}(h^3).  
\end{equation}  
Since the second variable \( y \) is also a function of the first one, the derivatives, by the chain rule, are given as  
\begin{align*}  
f^{[1]}(t, y(t)) &:= \frac{\dd f}{\dd t}(t, y(t)) = f_t + f_y y' = f_t + f_y f, \\  
f^{[2]}(t, y(t)) &:= \frac{\dd f^{[1]}}{\dd t}(t, y(t)) = f_t^{[1]} + f_y^{[1]} f \\  
&= f_{tt} + 2 f_{ty} f + f^{\intercal} f_{yy} f + f_y f^{[1]}.  
\end{align*}  
Higher derivatives \( f^{[k]} := \frac{\dd^k f}{\dd t^k} \) can be computed recursively.  

Substituting \eqref{eq:fTaylor} into \( h^{-1} \int_{t}^{t+h} f(s, y(s)) \dd s \), and combining this with the expansion of \( \Phi \), we obtain  
\begin{align*}  
T(t, y; h) = & (\alpha_1 + \alpha_2 - 1) f + \left( \alpha_2 \mu - \frac{1}{2} \right) h f^{[1]} - \red{\frac{1}{6} h^2 f_y f^{[1]}} \\  
& + \frac{1}{2} h^2 \left( \alpha_2 \mu^2 - \frac{1}{3} \right) (f_{tt} + 2 f_{ty} f + f^{\intercal} f_{yy} f) + \mathcal{O}(h^3).  
\end{align*}  

Since the \textcolor{red}{red term} is generally nonzero, the two-stage methods are limited to at most second-order accuracy. We achieve second-order accuracy by choosing parameters that satisfy the relations  
\[
\alpha_1 + \alpha_2 = 1, \quad \alpha_2 \mu = \frac{1}{2}.
\]  

Examples of $(1- \alpha_2, \alpha_2, 1/(2\alpha_2))$ include
\begin{itemize}
 \item Improved Euler method: $(0, 1, 1/2)$;
 \item Heun method: $(1/2, 1/2, 1)$;
 \item The choice $(1/4, 3/4, 2/3)$ will remove one part in $h^2$ term.
\end{itemize}
Notice that the choice \((1, 0, 0)\) corresponds to the forward Euler method, which only achieves a truncation order of \( \mathcal{O}(h) \).  


\subsection{Runge-Kutta methods}
A generalization of two-stage methods is $r$-stage Runge-Kutta methods:
\begin{align*}
\Phi(t, y; h) &= \sum_{s=1}^r \alpha_s k_s,\\
k_1 &= f(t, y);\\
k_s & = f(t+\mu_s h, y + h \sum_{j=1}^{s-1}\lambda_{sj}k_j), \quad s = 2,3,\ldots, r.
\end{align*}
The maximal order of $r$-stage R-K method is bounded by $r$ and only achievable for $r\leq 4$. Systematical way of finding parameters can be found in ... Here we only list a popular one.

\begin{figure}[htbp]
\begin{center}
\includegraphics[width=4.6in]{figures/RKlatex.pdf}
\caption{Runge-Kutta methods}
\label{fig:RK}
\end{center}
\end{figure}

\begin{tcolorbox}[colframe=black!15!white, coltitle=white!5!black, title = \bf The classical $4$-th order Runge-Kutta method]
Let $t_0=a,\, u_0 = y_0$. For $n=0,1,2,...,$
\begin{align*}
k_1 &= f(t_n, u_n),\\
k_2 & = f(t_n + \frac{1}{2}h_n, u_n + \frac{1}{2}h_n k_1),\\
k_3 & = f(t_n + \frac{1}{2}h_n, u_n + \frac{1}{2}h_n k_2),\\
k_4 & = f(t_n + h_n, u_n + h_n k_3),\\
u_{n+1} &= u_n +  \frac{h_n}{6}( k_1 + 2k_2 + 2k_3 + k_4).
\end{align*}
\end{tcolorbox}

It resembles Simpson's rule for numerical quadrature:  
\[
\int_{t}^{t+h} f(s)\,\mathrm{d}s \approx \frac{h}{6} \left(f(t) + 4f(t+h/2) + f(t+h)\right).  
\]  
When the integrand is changed to $f(s, y(s))$, approximations of $y(t+h/2)$ and $y(t+h)$ are required. Verifying the order involves a tedious Taylor expansion.  

\section{Convergence Analysis}
We recall the generic one step method below.
\medskip
\begin{tcolorbox}[colframe=black!15!white, coltitle=white!5!black, title = \bf One Step Method ($\Phi$)]
Let $t_0=a,\, u_0 = y_0$. For $n=0,1,2,...,$
\begin{align*}
t_{n+1} &= t_n + h_n,\\
u_{n+1} &= u_n + h_n \Phi(t_n, u_n; h_n).
\end{align*}
\end{tcolorbox}


\subsection{Error equation}
We first set up the space with norm and the operators. We partition the interval $[a,b]$ into a uniform grid $\mathcal T_h$ with size $h$
$$
a = t_0 < t_1 < \cdots < t_{N} = b, \quad t_i = a + ih, h = (b-a)/N.
$$
Introduce the vector space $\Gamma_h = \{ \bs v = (v_0, v_1, \ldots, v_{N})\}\cong \mathbb R^{N+1}$ equipped with $\|\cdot\|_{\infty}$ norm. As $h$ is fixed, we will write $\Phi(t_n, u_n; h_n)$ as $\Phi_h(t_n, u_n)$ to simplify the notation and emphasize the variables are $(t, y)$.

For $v\in C^1[a,b]$, define the residual operator $R v=v^{\prime}(t)-f(t, v(t))$. Similarly, define the discrete residual operator
$$
\left(R_h \bs v\right)_n =\frac{1}{h}\left(v_{n+1}-v_n\right)-\Phi_h(t_n, v_n), \quad n =0,1,\ldots, N-1.
$$
Then by definition, we have the equations$$
\begin{aligned}
R y & =0, \quad &y(0) &= y_0 \\
R_n \bs u & =0, \quad &u_0 &= y_0.
\end{aligned}
$$
where $y$ is the solution to the initial value problem. 

To measure the error, we need to put $y$ and $\bs u$ into the same space. For a function $v\in C[a,b]$, introduce the nodal interpolation $\bs v_I \in \Gamma_h$ as
$$
v_n = v(t_n), \quad n = 0, 1, \ldots, N.
$$
Notice that $v$ as a continuous function is defined everywhere in the interval while $\bs v_I$ is a vector with function values defined only on grid points. 


Now we are ready to present the error equation.
\begin{equation}\label{eq:error}
R_h\left(\bs u - \bs y_I\right)=R_h \bs y_I = - T(\bs y_I ; h).
\end{equation}

\subsection{Stability}
We introduce the stability of the operator $R_h$. Consider the equation
$$
\begin{aligned}
R_h \bs w &=\bs \varepsilon 
\\ w_0&=\eta_0.
\end{aligned}
$$
If there exists a constant $K>0$ independent of $h$ s.t. 
$$
\|\bs w\|_{\infty} \leqslant K\left(\left\|\eta_0\right\|+\|\bs \varepsilon\|_{\infty}\right),
$$
we call $R_h$ is stable. 

\begin{theorem}
If ${\Phi}(t, y ; h)$ satisfies a Lipschitz condition with respect to the $y$-variables,
$$
\left\|{\Phi}_h(t, y)-{\Phi}_h\left(t, y^*\right)\right\| \leq M\left\|y-y^*\right\| \text { on }[a, b] \times \mathbb{R}^d \times\left[0, h_0\right]
$$
then the one step method defined by $\Phi$ is stable.
\end{theorem}

We precede the proof with the following useful lemma. The proof is elementary.
\begin{lemma}\label{lm:cumsum}
Let $\left\{e_n\right\}$ be a sequence of numbers $e_n \in \mathbb{R}$ satisfying
$$
e_{n+1} \leq a_n e_n+b_n, \quad n=0,1, \ldots, N-1
$$
where $a_n>0$ and $b_n \in \mathbb{R}$. Then
$$
\begin{aligned}
e_{n+1} &\leq E_{n+1}, \text{ with }\\
E_{n+1}&=\left(\prod_{k=0}^{n} a_k\right) e_0+\sum_{k=0}^{n}\left(\prod_{\ell=k+1}^{n} a_{\ell}\right) b_k, n=0,1, \ldots, N
\end{aligned}
$$
\end{lemma}
Here is a graphical representation of the coefficients
$$
\begin{pmatrix}
a_0 & 0 & 0 & \cdots & 0 & 0 & 0\\
a_1 & a_1 & 0 & \cdots & 0 & 0 & 0\\
a_2 & a_2 & a_2 & \cdots & 0 & 0 & 0\\
\vdots & \vdots & \vdots & \ddots & \vdots & \vdots & \vdots \\
%a_{n-1} & a_{n-1} & a_{n-1} & \cdots & a_{n-1} & 0 & 0 \\
a_n & a_n & a_n & \cdots & a_n & a_n & 1 \\
\\
e_0 & b_0 & b_1 & b_2 & \cdots & b_{n-1} & b_n
\end{pmatrix}
$$
We need to control the product $\prod_{k=0}^{n} a_k \leq C$ uniformly to $n$. Fortunately $a_k  = 1 + Mh$ is a perturbation near $1$. Also there is accumulation of $b_n$ sequence $\sum_{k=0}^N |b_k|$. 

\subsection{Convergence} We write out the error equation \eqref{eq:error} in detail:
\begin{equation}
\begin{aligned}
&u_{n+1} - y(t_{n+1}) - (u_n - y(t_n))\\
={}& h \left [ \Phi_h(t_n, u_n) - \Phi_h(t_n, y(t_n)) \right ] - hT_h(t_n, y(t_n)). 
\end{aligned}
\end{equation}
Let $e_{n+1} = |u_{n+1} - y(t_{n+1})|$ and $b_n = h  |T_h(t_n, y(t_n))|$. Using the Lipschitz continuity, we then get the inequality
$$
e_{n+1} \leq (1+ Mh)e_n + b_n.
$$
Then $a_k = 1 + Mh$ and
$$
\prod_{k=0}^{N-1} a_k = (1 + Mh)^N = \left (1 + \frac{M(b-a)}{N}\right )^N \leq e^{(b-a)M}.
$$
Assuming $\|T\|_{\infty}\leq Ch^p$, the accumulation of $b_k$ is controlled as
$$
\sum_{k=0}^{N}\left(\prod_{\ell=k+1}^{N} a_{\ell}\right) b_k \leq Ch^p e^{(b-a)M} \sum_{k=0}^{N} h = Ch^p e^{(b-a)M}(b-a).
$$
The extra factor $h$ is used to ensure the accumulation is bounded by the length of interval.

We summarize the convergence in the following theorem.
\begin{theorem}
 If ${\Phi}_h(t, y)$ is Lipschitz with respect to the $y$-variables, and the truncation error $\|T_h(t_n, y(t_n))\|_{\infty}= \mathcal O(h^p)$, then
 $$
\|\boldsymbol{u}-\boldsymbol{y}_I\|_{\infty}=\mathcal O\left(h^p\right) \text { as }|h| \rightarrow 0.
$$
\end{theorem}

\subsection{Convergence using perturbation of curves}
Parallel to using the Lipschitz continuity of $\Phi$, we can also leverage the Lipschitz continuity of $f$ by comparing curves with different initial values.  
\begin{lemma}
Let $f(t, y)$ be continuous in $t$ for $a \leq t \leq b$ and satisfy Lipschitz condition for $y$ uniformly on $[a, b] \times \mathbb{R}^d$ with Lipschitz constant $L$. Then the initial value problem
\begin{equation}\label{eq:ODEc}
\begin{aligned}
\frac{\mathrm{d} y}{\mathrm{~d} t} & =f(t, y), \quad a \leq t \leq b, \\
y(c) & =y_c
\end{aligned}
\end{equation}
has a unique solution on $[a, b]$ for any $c$ with $a \leq c \leq b$ and for any $y_c \in \mathbb{R}^d$. Let $y(t; s)$ and $y\left(t ; s^*\right)$ be the solutions of \eqref{eq:ODEc} corresponding to $y_c={s}$ and $y_c=s^*$, respectively. Then, for any vector norm $\|\cdot\|$,
$$
\left\|y(x ; s)-y\left(x ; s^*\right)\right\| \leq \mathrm{e}^{L|x-c|}\left\|s-s^*\right\|.
$$
\end{lemma}

Define
\begin{equation}\label{eq:vn}
\begin{aligned}
\frac{\dd v^n}{\dd t} &= f(t, v^n), \quad t_n \leq t \leq b, \\
v^n(t_n) &= u_n.
\end{aligned}
\end{equation}
Then we can split the error at $t_{n+1}$ as
$$
\begin{aligned}
u_{n+1} - y(t_{n+1}) &= u_{n+1} - v^n(t_{n+1}) + v^n(t_{n+1}) - y(t_{n+1}) \\
&= h T_h(t_n, u_n) + v^n(t_{n+1}) - y(t_{n+1}).
\end{aligned}
$$
\begin{figure}[htbp]
\label{fig:}
\subfigure[Use scheme at $\Phi_h(t, y(t_n))$]{
\begin{minipage}[t]{0.45\linewidth}
\centering
\includegraphics*[width=5.75cm]{figures/Error1.pdf}
\end{minipage}}%%
\subfigure[Use ODE with initial value $u_n$]
{\begin{minipage}[t]{0.55\linewidth}
\centering
\quad \includegraphics*[width=5.75cm]{figures/Error2.pdf}
\end{minipage}}
\caption{Different splitting of error $u_{n+1} -y(t_{n+1})$.}
\end{figure}


The two curves $v^n$ and $y$ are both solutions to the same ODE but with different initial value. Compare 
\begin{equation}
\begin{aligned}
\frac{\dd y}{\dd t} &= f(t, y), \quad t_n \leq t \leq b, \\
y(t_n) &= y(t_n).
\end{aligned}
\end{equation}
We know
$$
\|v^n(t_{n+1}) - y(t_{n+1})\| \leq \mathrm{e}^{Lh}\left\|u_n - y(t_n)\right\|.
$$
So we get the inequality for the error:
$$
e_{n+1}\leq \exp(Lh) e_n + h \epsilon_T,  \quad e_0 = 0, \epsilon_T = \|T_h\|_{\infty}.
$$
Apply Lemma \ref{lm:cumsum}, we get
$$
e_{n+1}\leq \epsilon_T \sum_{k=0}^n h \exp(Lh)\leq \epsilon_T \int_a^b \exp (L(b-t))\dd t = \epsilon_T \frac{ \exp (L(b-a))-1}{L}.
$$
Therefore, if we want to control 
$$
\| \bs u - \bs y_I\|_{\infty} \leq \epsilon,
$$
we can set the truncation error bound to 
\begin{equation}
 \epsilon_T \leq \frac{L}{ \exp (L(b-a))-1} \epsilon.
\end{equation}


\section{Error Expansion and Adaptive Mesh Size}
Previously we use constant time step size $h$. For some problems, the solution may have dramatic change where a smaller $h$ is needed to achieve an accurate solution.
\subsection{Adaptive mesh size}
For one step method $\Phi$ of order $p$, we adaptively choose the mesh size by the following algorithm.

Parameters: $\gamma >1, \rho \in (0,1)$, and $\epsilon_T \ll 1$. Input: $h_0, u_0, t_0$. 
\begin{enumerate}
 \item Estimate $h_n$ as $h_n = \gamma h_{n-1}$ with $\gamma > 1$.
\item Compute 
$$
u_{n+1} = {u}_n+h_n {\Phi}\left(t_n, {u}_n ; h_n\right)$$ and an error estimator on the truncation error 
$$
\eta_n \approx T\left(t_n, \boldsymbol{u}_n ; h_n\right).
$$
\item Test $$\left\| \eta_n \right\| \leq \varepsilon_T.$$ If the test passes, proceed with the next step; if not, repeat the step with a smaller $h_n \leftarrow \rho h_n, \rho \in (0,1)$, say, half as large, until the test passes.
\end{enumerate}

The key is the error estimator $\eta_n$ of the truncation error. 

\subsection{Estimate of truncation error}
Recall that 
$$
T(t_n, u_n, h_n) = h^{-1}(u_{n+1} - v^n(t_{n+1})),
$$
where $v^n$ is the solution of ODE \eqref{eq:vn} which is in general not computable. A natural ideal is to use 
$$
\eta_n = h^{-1}(u_{n+1} - u_{n+1}^*),
$$
where $u_{n+1}^*$ is a better solution than $u_{n+1}$. 

We discuss two ways to obtain $u_{n+1}^*$: $h$-method and $p$-method.

\subsubsection{$h$-method} We simply half the mesh size and move two steps to get $u_{n+1}^*$. 
\begin{equation}
\begin{aligned}
{u}_{n+1} & = {u}_n+h {\Phi}(t_n, u_n ; h) \\
{u}_{n+1/2} & = {u}_n +\frac{1}{2} h {\Phi}\left(t_n, u_n ; \frac{1}{2} h\right) \\
{u}_{n+1}^* & ={u}_{n+1/2}+\frac{1}{2} h {\Phi}\left(t_n+\frac{1}{2} h, {u}_{n+1/2}; \frac{1}{2} h\right).
\end{aligned}
\end{equation}
Assuming $\Phi$ is of order $p$, we have
\begin{equation}\label{eq:h-half}
\begin{aligned}
h^{-1}({u}_{n+1} -  {u}_n) = \tau h^p + \mathcal O(h^{p+1})\\
\left (\frac{h}{2}\right )^{-1}({u}_{n+1/2} -  {u}_n) = \tau \left (\frac{h}{2}\right )^p + \mathcal O(h^{p+1}),\\
\left (\frac{h}{2}\right )^{-1}(u_{n+1}^* - {u}_{n+1/2}) = \tau \left (\frac{h}{2}\right )^p + \mathcal O(h^{p+1}).
\end{aligned}
\end{equation}
Here $\tau = \tau(t_n, u_n)$ is treating as a constant. The term $\tau(t_{n}+h/2, u_{n+1/2}) = \tau + \mathcal O(h)$. 

Adding the second and third identities of \eqref{eq:h-half}, we have
$$
h^{-1}({u}_{n+1}^* -  {u}_n) = \tau \left (\frac{h}{2}\right )^p + \mathcal O(h^{p+1})
$$
Subtract the first equation of  \eqref{eq:h-half}, we get 
So 
$$
\eta_n = \tau h^p =\frac{1}{1-2^{-p}} \frac{1}{h}(u_{n+1} - u_{n+1}^*).
$$

\subsubsection{$p$-method}


\subsection{Principal error equation}
We assume the truncation error
\begin{equation}\label{eq:Tphi}  
T_h(t, y) := \frac{1}{h}(u_{\rm next} - y_{\rm next}) =  \Phi_h(t, y) - \frac{1}{h} \int_{t}^{t+h} f(s, y(s)) \, \dd s
\end{equation}  
have the expansion
$$
T_h(t, y) = \tau(t, y) h^p + \mathcal O(h^{p+1}),
$$
where $\tau(t,y)$ is called the principle error function (of the truncation error). 

We want the principle error function for the total error
$$
u_n - y(t_n) = e(t_n) h^p + \mathcal O(h^{p+1}).
$$
What is the difference? When defining the truncation error, $u_{\rm next}$ and $y_{\rm next}$ share the same initial condition at $t_n$ while $u_{n+1}$ and $y(t_{n+1})$ are not. The truncation error is accumulated. 

\subsection{The principal error equation}
We claim the principal error function satisfies the following ODE.
\begin{theorem}
Let $e(x)$ be the solution of the linear initial value problem
$$
\begin{aligned}
e'(t)& =f_y(t, {y}(t)) e+{\tau}(t, {y}(t)), \quad a \leq t \leq b, \\
e(a) & = 0.
\end{aligned}
$$
Then, for $n=0,1, \ldots, N$,
$$
{u}_n-{y}\left(t_n\right)=e\left(t_n\right) h^p+O\left(h^{p+1}\right) \text { as } h \rightarrow 0.
$$
\end{theorem}
\begin{proof}
We use Taylor series to expand
$$
\begin{aligned}
\Phi_h(t_n, u_n) - \Phi_h(t_n, y(t_n)) &= \partial_y\Phi_h(t_n, y(t_n))( u_n - y(t_n))\\
&= \partial_y\Phi_0(t_n, y(t_n))( u_n - y(t_n)) + \mathcal O(h^{p+1})\\
&= f_y(t_n, y(t_n))( u_n - y(t_n)) + \mathcal O(h^{p+1}).
\end{aligned}
$$

To distinguish the continuous error function $e(t)$ with its numerical approximation, we denote
$$
r_n = h^{-p} ({u}_n-{y}\left(t_n\right)). 
$$
Then $r_0 = 0$ and for $n=0,1, \ldots, N-1$
$$
\begin{gathered}
\frac{1}{h}\left(r_{n+1}-r_n\right)=f_y\left(t_n, {y}\left(x_n\right)\right) r_n+{\tau}\left(t_n, {y}\left(t_n\right)\right)+O(h).
\end{gathered}
$$
which is
$$
\left(R_h^{\text {Euler}} \bs r\right)_n=\boldsymbol{\varepsilon}_n, \quad  \varepsilon_n=O(h).
$$
\end{proof}


\bibliographystyle{abbrv}
 \bibliography{/Dropbox/Math/biblib/library}
\end{document}