|
1 | 1 | {
|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 |
| - "cell_type": "code", |
5 |
| - "execution_count": null, |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "# Non-Associativity\n", |
| 8 | + "\n", |
| 9 | + "$$\n", |
| 10 | + " a + (b + c) \\neq (a + b) + c\n", |
| 11 | + "$$" |
| 12 | + ] |
| 13 | + }, |
| 14 | + { |
| 15 | + "cell_type": "markdown", |
6 | 16 | "metadata": {},
|
7 |
| - "outputs": [], |
8 | 17 | "source": [
|
| 18 | + "Seen at: https://medium.com/@anis.zakari/changing-the-gpu-is-changing-the-behaviour-of-your-llm-0e6dd8dfaaae" |
| 19 | + ] |
| 20 | + }, |
| 21 | + { |
| 22 | + "cell_type": "markdown", |
| 23 | + "metadata": {}, |
| 24 | + "source": [ |
| 25 | + "Original code snippet used torch with bfloats (actually brain floating point):\n", |
| 26 | + "\n", |
| 27 | + "```python\n", |
9 | 28 | "import torch\n",
|
10 | 29 | "\n",
|
11 |
| - "a = torch.tensor(1e10, dtype=torch.float16)\n", |
12 |
| - "b = torch.tensor(1.0, dtype=torch.float16)\n", |
13 |
| - "c = torch.tensor(-1e10, dtype=torch.float16)" |
| 30 | + "a = torch.tensor(1e10, dtype=torch.bfloat16)\n", |
| 31 | + "b = torch.tensor(-1e10, dtype=torch.bfloat16)\n", |
| 32 | + "c = torch.tensor(1.0, dtype=torch.bfloat16)\n", |
| 33 | + "```" |
14 | 34 | ]
|
15 | 35 | },
|
16 | 36 | {
|
17 | 37 | "cell_type": "code",
|
18 |
| - "execution_count": null, |
| 38 | + "execution_count": 26, |
19 | 39 | "metadata": {},
|
20 | 40 | "outputs": [],
|
| 41 | + "source": [ |
| 42 | + "import numpy as np\n", |
| 43 | + "\n", |
| 44 | + "a = np.array(1e4, dtype=np.float16)\n", |
| 45 | + "b = np.array(-1e4, dtype=np.float16)\n", |
| 46 | + "c = np.array(1, dtype=np.float16)" |
| 47 | + ] |
| 48 | + }, |
| 49 | + { |
| 50 | + "cell_type": "code", |
| 51 | + "execution_count": 32, |
| 52 | + "metadata": {}, |
| 53 | + "outputs": [ |
| 54 | + { |
| 55 | + "data": { |
| 56 | + "text/plain": [ |
| 57 | + "np.False_" |
| 58 | + ] |
| 59 | + }, |
| 60 | + "execution_count": 32, |
| 61 | + "metadata": {}, |
| 62 | + "output_type": "execute_result" |
| 63 | + } |
| 64 | + ], |
| 65 | + "source": [ |
| 66 | + "a + (b + c) == (a + b) + c" |
| 67 | + ] |
| 68 | + }, |
| 69 | + { |
| 70 | + "cell_type": "code", |
| 71 | + "execution_count": 33, |
| 72 | + "metadata": {}, |
| 73 | + "outputs": [ |
| 74 | + { |
| 75 | + "data": { |
| 76 | + "text/plain": [ |
| 77 | + "np.float16(0.0)" |
| 78 | + ] |
| 79 | + }, |
| 80 | + "execution_count": 33, |
| 81 | + "metadata": {}, |
| 82 | + "output_type": "execute_result" |
| 83 | + } |
| 84 | + ], |
21 | 85 | "source": [
|
22 | 86 | "a + (b + c)"
|
23 | 87 | ]
|
24 | 88 | },
|
25 | 89 | {
|
26 | 90 | "cell_type": "code",
|
27 |
| - "execution_count": null, |
| 91 | + "execution_count": 34, |
28 | 92 | "metadata": {},
|
29 |
| - "outputs": [], |
| 93 | + "outputs": [ |
| 94 | + { |
| 95 | + "data": { |
| 96 | + "text/plain": [ |
| 97 | + "np.float16(1.0)" |
| 98 | + ] |
| 99 | + }, |
| 100 | + "execution_count": 34, |
| 101 | + "metadata": {}, |
| 102 | + "output_type": "execute_result" |
| 103 | + } |
| 104 | + ], |
30 | 105 | "source": [
|
31 | 106 | "(a + b) + c"
|
32 | 107 | ]
|
|
39 | 114 | "name": "python3"
|
40 | 115 | },
|
41 | 116 | "language_info": {
|
| 117 | + "codemirror_mode": { |
| 118 | + "name": "ipython", |
| 119 | + "version": 3 |
| 120 | + }, |
| 121 | + "file_extension": ".py", |
| 122 | + "mimetype": "text/x-python", |
42 | 123 | "name": "python",
|
| 124 | + "nbconvert_exporter": "python", |
| 125 | + "pygments_lexer": "ipython3", |
43 | 126 | "version": "3.12.2"
|
44 | 127 | }
|
45 | 128 | },
|
|
0 commit comments