|
21 | 21 | },
|
22 | 22 | {
|
23 | 23 | "cell_type": "code",
|
24 |
| - "execution_count": 1, |
25 |
| - "metadata": {}, |
26 |
| - "outputs": [ |
27 |
| - { |
28 |
| - "name": "stdout", |
29 |
| - "output_type": "stream", |
30 |
| - "text": [ |
31 |
| - "+ curl -O -# https://www.gutenberg.org/files/2600/2600-0.txt\n", |
32 |
| - "######################################################################### 100.0%\n", |
33 |
| - "+ curl -O -# https://www.gutenberg.org/files/8800/8800.txt\n", |
34 |
| - "######################################################################### 100.0%\n", |
35 |
| - "+ curl -O -# https://www.gutenberg.org/files/84/84-0.txt\n", |
36 |
| - "######################################################################### 100.0%\n", |
37 |
| - "+ curl -O -# https://www.gutenberg.org/files/2701/2701-0.txt\n", |
38 |
| - "######################################################################### 100.0%\n", |
39 |
| - "+ curl -O -# https://www.gutenberg.org/files/35/35-0.txt\n", |
40 |
| - "######################################################################### 100.0%\n", |
41 |
| - "+ curl -O -# https://www.gutenberg.org/files/1342/1342-0.txt\n", |
42 |
| - "######################################################################### 100.0%\n", |
43 |
| - "+ curl -O -# https://www.gutenberg.org/files/3825/3825-0.txt\n", |
44 |
| - "######################################################################### 100.0%\n", |
45 |
| - "+ curl -O -# https://www.gutenberg.org/files/996/996-0.txt\n", |
46 |
| - "######################################################################### 100.0%\n", |
47 |
| - "+ curl -O -# https://www.gutenberg.org/files/55/55-0.txt\n", |
48 |
| - "######################################################################### 100.0%\n", |
49 |
| - "+ curl -O -# https://www.gutenberg.org/files/6130/6130-0.txt\n", |
50 |
| - "######################################################################### 100.0%\n", |
51 |
| - "+ curl -O -# https://www.gutenberg.org/files/1727/1727-0.txt\n", |
52 |
| - "######################################################################### 100.0%\n" |
53 |
| - ] |
54 |
| - } |
55 |
| - ], |
| 24 | + "execution_count": null, |
| 25 | + "metadata": {}, |
| 26 | + "outputs": [], |
56 | 27 | "source": [
|
57 | 28 | "!./books.sh"
|
58 | 29 | ]
|
|
68 | 39 | },
|
69 | 40 | {
|
70 | 41 | "cell_type": "code",
|
71 |
| - "execution_count": 2, |
72 |
| - "metadata": {}, |
73 |
| - "outputs": [ |
74 |
| - { |
75 |
| - "name": "stdout", |
76 |
| - "output_type": "stream", |
77 |
| - "text": [ |
78 |
| - "Input size 11451743 chars.\n", |
79 |
| - "Assembled 99632 nodes on 1 domains in 267ms.\n" |
80 |
| - ] |
81 |
| - } |
82 |
| - ], |
83 |
| - "source": [ |
84 |
| - "!g++ -std=c++20 -o tree starting_point.cpp -ltbb\n", |
| 42 | + "execution_count": null, |
| 43 | + "metadata": {}, |
| 44 | + "outputs": [], |
| 45 | + "source": [ |
| 46 | + "!g++ -std=c++20 -Ofast -march=native -o tree starting_point.cpp -ltbb\n", |
85 | 47 | "!./tree"
|
86 | 48 | ]
|
87 | 49 | },
|
|
95 | 57 | "\n",
|
96 | 58 | "## Exercise 1: process the input in parallel\n",
|
97 | 59 | "\n",
|
98 |
| - "The goal of this exercise is to process the input in parallel using multiple domains." |
| 60 | + "The goal of this exercise is to process the input in parallel using multiple domains.\n", |
| 61 | + "\n", |
| 62 | + "A template for the solution is provided in [exercise1.cpp]. The `TODO`s indicate the parts of the template that must be completed.\n", |
| 63 | + "\n", |
| 64 | + "[exercise1.cpp]: ./exercise1.cpp\n", |
| 65 | + "\n", |
| 66 | + "The example compiles and runs serially as provided.\n", |
| 67 | + "Once you parallelize it, the following blocks should compile and run correctly:" |
| 68 | + ] |
| 69 | + }, |
| 70 | + { |
| 71 | + "cell_type": "code", |
| 72 | + "execution_count": null, |
| 73 | + "metadata": {}, |
| 74 | + "outputs": [], |
| 75 | + "source": [ |
| 76 | + "!g++ -std=c++20 -Ofast -march=native -o tree exercise1.cpp -ltbb\n", |
| 77 | + "!./tree" |
| 78 | + ] |
| 79 | + }, |
| 80 | + { |
| 81 | + "cell_type": "code", |
| 82 | + "execution_count": null, |
| 83 | + "metadata": {}, |
| 84 | + "outputs": [], |
| 85 | + "source": [ |
| 86 | + "!clang++ -std=c++20 -Ofast -march=native -o tree exercise1.cpp -ltbb\n", |
| 87 | + "!./tree" |
| 88 | + ] |
| 89 | + }, |
| 90 | + { |
| 91 | + "cell_type": "code", |
| 92 | + "execution_count": null, |
| 93 | + "metadata": {}, |
| 94 | + "outputs": [], |
| 95 | + "source": [ |
| 96 | + "!nvc++ -std=c++20 -stdpar=multicore -O4 -fast -march=native -Mllvm-fast -o tree exercise1.cpp\n", |
| 97 | + "!./tree" |
99 | 98 | ]
|
100 | 99 | },
|
101 | 100 | {
|
|
106 | 105 | "\n",
|
107 | 106 | "The solutions for each example are available in the `solutions/` sub-directory.\n",
|
108 | 107 | "\n",
|
109 |
| - "The following compiles and runs the solutions for Exercise 0 using different compilers." |
| 108 | + "The following compiles and runs the solutions for Exercise 1 using different compilers." |
110 | 109 | ]
|
111 | 110 | },
|
112 | 111 | {
|
113 | 112 | "cell_type": "code",
|
114 |
| - "execution_count": 3, |
115 |
| - "metadata": {}, |
116 |
| - "outputs": [ |
117 |
| - { |
118 |
| - "name": "stdout", |
119 |
| - "output_type": "stream", |
120 |
| - "text": [ |
121 |
| - "Input size 11451743 chars.\n", |
122 |
| - "Assembled 99632 nodes on 1 domains in 232ms.\n", |
123 |
| - "Assembled 99632 nodes on 16 domains in 201ms.\n", |
124 |
| - "Assembled 99632 nodes on 100000 domains in 166ms.\n" |
125 |
| - ] |
126 |
| - } |
127 |
| - ], |
128 |
| - "source": [ |
129 |
| - "!g++ -std=c++20 -Ofast -DNDEBUG -o tree solutions/exercise0.cpp\n", |
| 113 | + "execution_count": null, |
| 114 | + "metadata": {}, |
| 115 | + "outputs": [], |
| 116 | + "source": [ |
| 117 | + "!g++ -std=c++20 -Ofast -march=native -DNDEBUG -o tree solutions/exercise1.cpp -ltbb\n", |
130 | 118 | "!./tree"
|
131 | 119 | ]
|
132 | 120 | },
|
133 | 121 | {
|
134 | 122 | "cell_type": "code",
|
135 |
| - "execution_count": 6, |
136 |
| - "metadata": {}, |
137 |
| - "outputs": [ |
138 |
| - { |
139 |
| - "name": "stdout", |
140 |
| - "output_type": "stream", |
141 |
| - "text": [ |
142 |
| - "nvvmCompileProgram error 9: NVVM_ERROR_COMPILATION.\n", |
143 |
| - "Error: /tmp/pgacc3rBLuXtV9Je.gpu (1281, 38): parse atomicrmw value and pointer type do not match\n", |
144 |
| - "NVC++-F-0155-Compiler failed to translate accelerator region (see -Minfo messages): Device compiler exited with error status code (solutions/exercise0.cpp: 1)\n", |
145 |
| - "NVC++/x86-64 Linux 22.5-0: compilation aborted\n", |
146 |
| - "Input size 11451743 chars.\n", |
147 |
| - "Assembled 99632 nodes on 1 domains in 6244ms.\n", |
148 |
| - "Assembled 99632 nodes on 16 domains in 726ms.\n", |
149 |
| - "Assembled 99632 nodes on 100000 domains in 22ms.\n" |
150 |
| - ] |
151 |
| - } |
152 |
| - ], |
153 |
| - "source": [ |
154 |
| - "# A GPU version using cuda::std::atomic is available:\n", |
155 |
| - "!nvc++ -std=c++20 -stdpar=gpu -gpu=cc80 -fast -DNDEBUG -o tree solutions/exercise0.cpp\n", |
| 123 | + "execution_count": null, |
| 124 | + "metadata": {}, |
| 125 | + "outputs": [], |
| 126 | + "source": [ |
| 127 | + "!clang++ -std=c++20 -Ofast -march=native -DNDEBUG -o tree solutions/exercise1.cpp -ltbb\n", |
| 128 | + "!./tree" |
| 129 | + ] |
| 130 | + }, |
| 131 | + { |
| 132 | + "cell_type": "code", |
| 133 | + "execution_count": null, |
| 134 | + "metadata": {}, |
| 135 | + "outputs": [], |
| 136 | + "source": [ |
| 137 | + "!nvc++ -std=c++20 -stdpar=multicore -O4 -fast -march=native -Mllvm-fast -DNDEBUG -o tree solutions/exercise1.cpp\n", |
| 138 | + "!./tree" |
| 139 | + ] |
| 140 | + }, |
| 141 | + { |
| 142 | + "cell_type": "markdown", |
| 143 | + "metadata": {}, |
| 144 | + "source": [ |
| 145 | + "Currently, not all `std::atomic` operations are supported on GPUs.\n", |
| 146 | + "The CUDA Toolkit is included with the HPC SDK and includes [libcudacxx](https://github.com/NVIDIA/libcudacxx), the CUDA C++ standard library.\n", |
| 147 | + "This library provides the `cuda::atomic` and similar types in the `#include <cuda/atomic>` header and those can be used on GPUs." |
| 148 | + ] |
| 149 | + }, |
| 150 | + { |
| 151 | + "cell_type": "code", |
| 152 | + "execution_count": null, |
| 153 | + "metadata": {}, |
| 154 | + "outputs": [], |
| 155 | + "source": [ |
| 156 | + "!g++ -std=c++20 -Ofast -march=native -DNDEBUG -o tree solutions/exercise1_gpu.cpp -ltbb\n", |
| 157 | + "!./tree" |
| 158 | + ] |
| 159 | + }, |
| 160 | + { |
| 161 | + "cell_type": "code", |
| 162 | + "execution_count": null, |
| 163 | + "metadata": {}, |
| 164 | + "outputs": [], |
| 165 | + "source": [ |
| 166 | + "!clang++ -std=c++20 -Ofast -march=native -DNDEBUG -o tree solutions/exercise1_gpu.cpp -ltbb\n", |
| 167 | + "!./tree" |
| 168 | + ] |
| 169 | + }, |
| 170 | + { |
| 171 | + "cell_type": "code", |
| 172 | + "execution_count": null, |
| 173 | + "metadata": {}, |
| 174 | + "outputs": [], |
| 175 | + "source": [ |
| 176 | + "!nvc++ -std=c++20 -stdpar=gpu -O4 -fast -march=native -Mllvm-fast -DNDEBUG -o tree solutions/exercise1_gpu.cpp\n", |
156 | 177 | "!./tree"
|
157 | 178 | ]
|
158 | 179 | }
|
|
173 | 194 | "name": "python",
|
174 | 195 | "nbconvert_exporter": "python",
|
175 | 196 | "pygments_lexer": "ipython3",
|
176 |
| - "version": "3.8.10" |
| 197 | + "version": "3.10.6" |
177 | 198 | }
|
178 | 199 | },
|
179 | 200 | "nbformat": 4,
|
|
0 commit comments